LCOV - code coverage report
Current view: top level - src/fe_utils - psqlscan.l (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 170 226 75.2 %
Date: 2024-11-21 08:14:44 Functions: 18 18 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : %top{
       2             : /*-------------------------------------------------------------------------
       3             :  *
       4             :  * psqlscan.l
       5             :  *    lexical scanner for SQL commands
       6             :  *
       7             :  * This lexer used to be part of psql, and that heritage is reflected in
       8             :  * the file name as well as function and typedef names, though it can now
       9             :  * be used by other frontend programs as well.  It's also possible to extend
      10             :  * this lexer with a compatible add-on lexer to handle program-specific
      11             :  * backslash commands.
      12             :  *
      13             :  * This code is mainly concerned with determining where the end of a SQL
      14             :  * statement is: we are looking for semicolons that are not within quotes,
      15             :  * comments, or parentheses.  The most reliable way to handle this is to
      16             :  * borrow the backend's flex lexer rules, lock, stock, and barrel.  The rules
      17             :  * below are (except for a few) the same as the backend's, but their actions
      18             :  * are just ECHO whereas the backend's actions generally do other things.
      19             :  *
      20             :  * XXX The rules in this file must be kept in sync with the backend lexer!!!
      21             :  *
      22             :  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
      23             :  *
      24             :  * See psqlscan_int.h for additional commentary.
      25             :  *
      26             :  *
      27             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      28             :  * Portions Copyright (c) 1994, Regents of the University of California
      29             :  *
      30             :  * IDENTIFICATION
      31             :  *    src/fe_utils/psqlscan.l
      32             :  *
      33             :  *-------------------------------------------------------------------------
      34             :  */
      35             : #include "postgres_fe.h"
      36             : 
      37             : #include "common/logging.h"
      38             : #include "fe_utils/psqlscan.h"
      39             : 
      40             : #include "libpq-fe.h"
      41             : }
      42             : 
      43             : %{
      44             : 
      45             : /* LCOV_EXCL_START */
      46             : 
      47             : #include "fe_utils/psqlscan_int.h"
      48             : 
      49             : /*
      50             :  * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
      51             :  * doesn't presently make use of that argument, so just declare it as int.
      52             :  */
      53             : typedef int YYSTYPE;
      54             : 
      55             : /*
      56             :  * Set the type of yyextra; we use it as a pointer back to the containing
      57             :  * PsqlScanState.
      58             :  */
      59             : #define YY_EXTRA_TYPE PsqlScanState
      60             : 
      61             : 
      62             : /* Return values from yylex() */
      63             : #define LEXRES_EOL          0   /* end of input */
      64             : #define LEXRES_SEMI         1   /* command-terminating semicolon found */
      65             : #define LEXRES_BACKSLASH    2   /* backslash command start */
      66             : 
      67             : 
      68             : #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
      69             : 
      70             : /*
      71             :  * Work around a bug in flex 2.5.35: it emits a couple of functions that
      72             :  * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
      73             :  * this would cause warnings.  Providing our own declarations should be
      74             :  * harmless even when the bug gets fixed.
      75             :  */
      76             : extern int  psql_yyget_column(yyscan_t yyscanner);
      77             : extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
      78             : 
      79             : %}
      80             : 
      81             : %option reentrant
      82             : %option bison-bridge
      83             : %option 8bit
      84             : %option never-interactive
      85             : %option nodefault
      86             : %option noinput
      87             : %option nounput
      88             : %option noyywrap
      89             : %option warn
      90             : %option prefix="psql_yy"
      91             : 
      92             : /*
      93             :  * All of the following definitions and rules should exactly match
      94             :  * src/backend/parser/scan.l so far as the flex patterns are concerned.
      95             :  * The rule bodies are just ECHO as opposed to what the backend does,
      96             :  * however.  (But be sure to duplicate code that affects the lexing process,
      97             :  * such as BEGIN() and yyless().)  Also, psqlscan uses a single <<EOF>> rule
      98             :  * whereas scan.l has a separate one for each exclusive state.
      99             :  */
     100             : 
     101             : /*
     102             :  * OK, here is a short description of lex/flex rules behavior.
     103             :  * The longest pattern which matches an input string is always chosen.
     104             :  * For equal-length patterns, the first occurring in the rules list is chosen.
     105             :  * INITIAL is the starting state, to which all non-conditional rules apply.
     106             :  * Exclusive states change parsing rules while the state is active.  When in
     107             :  * an exclusive state, only those rules defined for that state apply.
     108             :  *
     109             :  * We use exclusive states for quoted strings, extended comments,
     110             :  * and to eliminate parsing troubles for numeric strings.
     111             :  * Exclusive states:
     112             :  *  <xb> bit string literal
     113             :  *  <xc> extended C-style comments
     114             :  *  <xd> delimited identifiers (double-quoted identifiers)
     115             :  *  <xh> hexadecimal byte string
     116             :  *  <xq> standard quoted strings
     117             :  *  <xqs> quote stop (detect continued strings)
     118             :  *  <xe> extended quoted strings (support backslash escape sequences)
     119             :  *  <xdolq> $foo$ quoted strings
     120             :  *  <xui> quoted identifier with Unicode escapes
     121             :  *  <xus> quoted string with Unicode escapes
     122             :  *
     123             :  * Note: we intentionally don't mimic the backend's <xeu> state; we have
     124             :  * no need to distinguish it from <xe> state, and no good way to get out
     125             :  * of it in error cases.  The backend just throws yyerror() in those
     126             :  * cases, but that's not an option here.
     127             :  */
     128             : 
     129             : %x xb
     130             : %x xc
     131             : %x xd
     132             : %x xh
     133             : %x xq
     134             : %x xqs
     135             : %x xe
     136             : %x xdolq
     137             : %x xui
     138             : %x xus
     139             : 
     140             : /*
     141             :  * In order to make the world safe for Windows and Mac clients as well as
     142             :  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
     143             :  * sequence will be seen as two successive newlines, but that doesn't cause
     144             :  * any problems.  Comments that start with -- and extend to the next
     145             :  * newline are treated as equivalent to a single whitespace character.
     146             :  *
     147             :  * NOTE a fine point: if there is no newline following --, we will absorb
     148             :  * everything to the end of the input as a comment.  This is correct.  Older
     149             :  * versions of Postgres failed to recognize -- as a comment if the input
     150             :  * did not end with a newline.
     151             :  *
     152             :  * non_newline_space tracks all space characters except newlines.
     153             :  *
     154             :  * XXX if you change the set of whitespace characters, fix scanner_isspace()
     155             :  * to agree.
     156             :  */
     157             : 
     158             : space               [ \t\n\r\f\v]
     159             : non_newline_space   [ \t\f\v]
     160             : newline             [\n\r]
     161             : non_newline         [^\n\r]
     162             : 
     163             : comment         ("--"{non_newline}*)
     164             : 
     165             : whitespace      ({space}+|{comment})
     166             : 
     167             : /*
     168             :  * SQL requires at least one newline in the whitespace separating
     169             :  * string literals that are to be concatenated.  Silly, but who are we
     170             :  * to argue?  Note that {whitespace_with_newline} should not have * after
     171             :  * it, whereas {whitespace} should generally have a * after it...
     172             :  */
     173             : 
     174             : special_whitespace      ({space}+|{comment}{newline})
     175             : non_newline_whitespace  ({non_newline_space}|{comment})
     176             : whitespace_with_newline ({non_newline_whitespace}*{newline}{special_whitespace}*)
     177             : 
     178             : quote           '
     179             : /* If we see {quote} then {quotecontinue}, the quoted string continues */
     180             : quotecontinue   {whitespace_with_newline}{quote}
     181             : 
     182             : /*
     183             :  * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
     184             :  * {quotecontinue}.  It might seem that this could just be {whitespace}*,
     185             :  * but if there's a dash after {whitespace_with_newline}, it must be consumed
     186             :  * to see if there's another dash --- which would start a {comment} and thus
     187             :  * allow continuation of the {quotecontinue} token.
     188             :  */
     189             : quotecontinuefail   {whitespace}*"-"?
     190             : 
     191             : /* Bit string
     192             :  * It is tempting to scan the string for only those characters
     193             :  * which are allowed. However, this leads to silently swallowed
     194             :  * characters if illegal characters are included in the string.
     195             :  * For example, if xbinside is [01] then B'ABCD' is interpreted
     196             :  * as a zero-length string, and the ABCD' is lost!
     197             :  * Better to pass the string forward and let the input routines
     198             :  * validate the contents.
     199             :  */
     200             : xbstart         [bB]{quote}
     201             : xbinside        [^']*
     202             : 
     203             : /* Hexadecimal byte string */
     204             : xhstart         [xX]{quote}
     205             : xhinside        [^']*
     206             : 
     207             : /* National character */
     208             : xnstart         [nN]{quote}
     209             : 
     210             : /* Quoted string that allows backslash escapes */
     211             : xestart         [eE]{quote}
     212             : xeinside        [^\\']+
     213             : xeescape        [\\][^0-7]
     214             : xeoctesc        [\\][0-7]{1,3}
     215             : xehexesc        [\\]x[0-9A-Fa-f]{1,2}
     216             : xeunicode       [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
     217             : xeunicodefail   [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
     218             : 
     219             : /* Extended quote
     220             :  * xqdouble implements embedded quote, ''''
     221             :  */
     222             : xqstart         {quote}
     223             : xqdouble        {quote}{quote}
     224             : xqinside        [^']+
     225             : 
     226             : /* $foo$ style quotes ("dollar quoting")
     227             :  * The quoted string starts with $foo$ where "foo" is an optional string
     228             :  * in the form of an identifier, except that it may not contain "$",
     229             :  * and extends to the first occurrence of an identical string.
     230             :  * There is *no* processing of the quoted text.
     231             :  *
     232             :  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
     233             :  * fails to match its trailing "$".
     234             :  */
     235             : dolq_start      [A-Za-z\200-\377_]
     236             : dolq_cont       [A-Za-z\200-\377_0-9]
     237             : dolqdelim       \$({dolq_start}{dolq_cont}*)?\$
     238             : dolqfailed      \${dolq_start}{dolq_cont}*
     239             : dolqinside      [^$]+
     240             : 
     241             : /* Double quote
     242             :  * Allows embedded spaces and other special characters into identifiers.
     243             :  */
     244             : dquote          \"
     245             : xdstart         {dquote}
     246             : xdstop          {dquote}
     247             : xddouble        {dquote}{dquote}
     248             : xdinside        [^"]+
     249             : 
     250             : /* Quoted identifier with Unicode escapes */
     251             : xuistart        [uU]&{dquote}
     252             : 
     253             : /* Quoted string with Unicode escapes */
     254             : xusstart        [uU]&{quote}
     255             : 
     256             : /* error rule to avoid backup */
     257             : xufailed        [uU]&
     258             : 
     259             : 
     260             : /* C-style comments
     261             :  *
     262             :  * The "extended comment" syntax closely resembles allowable operator syntax.
     263             :  * The tricky part here is to get lex to recognize a string starting with
     264             :  * slash-star as a comment, when interpreting it as an operator would produce
     265             :  * a longer match --- remember lex will prefer a longer match!  Also, if we
     266             :  * have something like plus-slash-star, lex will think this is a 3-character
     267             :  * operator whereas we want to see it as a + operator and a comment start.
     268             :  * The solution is two-fold:
     269             :  * 1. append {op_chars}* to xcstart so that it matches as much text as
     270             :  *    {operator} would. Then the tie-breaker (first matching rule of same
     271             :  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
     272             :  *    in case it contains a star-slash that should terminate the comment.
     273             :  * 2. In the operator rule, check for slash-star within the operator, and
     274             :  *    if found throw it back with yyless().  This handles the plus-slash-star
     275             :  *    problem.
     276             :  * Dash-dash comments have similar interactions with the operator rule.
     277             :  */
     278             : xcstart         \/\*{op_chars}*
     279             : xcstop          \*+\/
     280             : xcinside        [^*/]+
     281             : 
     282             : ident_start     [A-Za-z\200-\377_]
     283             : ident_cont      [A-Za-z\200-\377_0-9\$]
     284             : 
     285             : identifier      {ident_start}{ident_cont}*
     286             : 
     287             : /* Assorted special-case operators and operator-like tokens */
     288             : typecast        "::"
     289             : dot_dot         \.\.
     290             : colon_equals    ":="
     291             : 
     292             : /*
     293             :  * These operator-like tokens (unlike the above ones) also match the {operator}
     294             :  * rule, which means that they might be overridden by a longer match if they
     295             :  * are followed by a comment start or a + or - character. Accordingly, if you
     296             :  * add to this list, you must also add corresponding code to the {operator}
     297             :  * block to return the correct token in such cases. (This is not needed in
     298             :  * psqlscan.l since the token value is ignored there.)
     299             :  */
     300             : equals_greater  "=>"
     301             : less_equals     "<="
     302             : greater_equals  ">="
     303             : less_greater    "<>"
     304             : not_equals      "!="
     305             : 
     306             : /*
     307             :  * "self" is the set of chars that should be returned as single-character
     308             :  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
     309             :  * which can be one or more characters long (but if a single-char token
     310             :  * appears in the "self" set, it is not to be returned as an Op).  Note
     311             :  * that the sets overlap, but each has some chars that are not in the other.
     312             :  *
     313             :  * If you change either set, adjust the character lists appearing in the
     314             :  * rule for "operator"!
     315             :  */
     316             : self            [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
     317             : op_chars        [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
     318             : operator        {op_chars}+
     319             : 
     320             : /*
     321             :  * Numbers
     322             :  *
     323             :  * Unary minus is not part of a number here.  Instead we pass it separately to
     324             :  * the parser, and there it gets coerced via doNegate().
     325             :  *
     326             :  * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
     327             :  *
     328             :  * {realfail} is added to prevent the need for scanner
     329             :  * backup when the {real} rule fails to match completely.
     330             :  */
     331             : decdigit        [0-9]
     332             : hexdigit        [0-9A-Fa-f]
     333             : octdigit        [0-7]
     334             : bindigit        [0-1]
     335             : 
     336             : decinteger      {decdigit}(_?{decdigit})*
     337             : hexinteger      0[xX](_?{hexdigit})+
     338             : octinteger      0[oO](_?{octdigit})+
     339             : bininteger      0[bB](_?{bindigit})+
     340             : 
     341             : hexfail         0[xX]_?
     342             : octfail         0[oO]_?
     343             : binfail         0[bB]_?
     344             : 
     345             : numeric         (({decinteger}\.{decinteger}?)|(\.{decinteger}))
     346             : numericfail     {decinteger}\.\.
     347             : 
     348             : real            ({decinteger}|{numeric})[Ee][-+]?{decinteger}
     349             : realfail        ({decinteger}|{numeric})[Ee][-+]
     350             : 
     351             : /* Positional parameters don't accept underscores. */
     352             : param           \${decdigit}+
     353             : 
     354             : /*
     355             :  * An identifier immediately following an integer literal is disallowed because
     356             :  * in some cases it's ambiguous what is meant: for example, 0x1234 could be
     357             :  * either a hexinteger or a decinteger "0" and an identifier "x1234".  We can
     358             :  * detect such problems by seeing if integer_junk matches a longer substring
     359             :  * than any of the XXXinteger patterns (decinteger, hexinteger, octinteger,
     360             :  * bininteger).  One "junk" pattern is sufficient because
     361             :  * {decinteger}{identifier} will match all the same strings we'd match with
     362             :  * {hexinteger}{identifier} etc.
     363             :  *
     364             :  * Note that the rule for integer_junk must appear after the ones for
     365             :  * XXXinteger to make this work correctly: 0x1234 will match both hexinteger
     366             :  * and integer_junk, and we need hexinteger to be chosen in that case.
     367             :  *
     368             :  * Also disallow strings matched by numeric_junk, real_junk and param_junk
     369             :  * for consistency.
     370             :  */
     371             : integer_junk    {decinteger}{identifier}
     372             : numeric_junk    {numeric}{identifier}
     373             : real_junk       {real}{identifier}
     374             : param_junk      \${decdigit}+{identifier}
     375             : 
     376             : /* psql-specific: characters allowed in variable names */
     377             : variable_char   [A-Za-z\200-\377_0-9]
     378             : 
     379             : other           .
     380             : 
     381             : /*
     382             :  * Dollar quoted strings are totally opaque, and no escaping is done on them.
     383             :  * Other quoted strings must allow some special characters such as single-quote
     384             :  *  and newline.
     385             :  * Embedded single-quotes are implemented both in the SQL standard
     386             :  *  style of two adjacent single quotes "''" and in the Postgres/Java style
     387             :  *  of escaped-quote "\'".
     388             :  * Other embedded escaped characters are matched explicitly and the leading
     389             :  *  backslash is dropped from the string.
     390             :  * Note that xcstart must appear before operator, as explained above!
     391             :  *  Also whitespace (comment) must appear before operator.
     392             :  */
     393             : 
     394             : %%
     395             : 
     396             : %{
     397             :         /* Declare some local variables inside yylex(), for convenience */
     398             :         PsqlScanState cur_state = yyextra;
     399             :         PQExpBuffer output_buf = cur_state->output_buf;
     400             : 
     401             :         /*
     402             :          * Force flex into the state indicated by start_state.  This has a
     403             :          * couple of purposes: it lets some of the functions below set a new
     404             :          * starting state without ugly direct access to flex variables, and it
     405             :          * allows us to transition from one flex lexer to another so that we
     406             :          * can lex different parts of the source string using separate lexers.
     407             :          */
     408             :         BEGIN(cur_state->start_state);
     409             : %}
     410             : 
     411             : {whitespace}    {
     412             :                     /*
     413             :                      * Note that the whitespace rule includes both true
     414             :                      * whitespace and single-line ("--" style) comments.
     415             :                      * We suppress whitespace until we have collected some
     416             :                      * non-whitespace data.  (This interacts with some
     417             :                      * decisions in MainLoop(); see there for details.)
     418             :                      */
     419             :                     if (output_buf->len > 0)
     420             :                         ECHO;
     421             :                 }
     422             : 
     423             : {xcstart}       {
     424             :                     cur_state->xcdepth = 0;
     425             :                     BEGIN(xc);
     426             :                     /* Put back any characters past slash-star; see above */
     427             :                     yyless(2);
     428             :                     ECHO;
     429             :                 }
     430             : 
     431             : <xc>{
     432             : {xcstart}       {
     433             :                     cur_state->xcdepth++;
     434             :                     /* Put back any characters past slash-star; see above */
     435             :                     yyless(2);
     436             :                     ECHO;
     437             :                 }
     438             : 
     439             : {xcstop}        {
     440             :                     if (cur_state->xcdepth <= 0)
     441             :                         BEGIN(INITIAL);
     442             :                     else
     443             :                         cur_state->xcdepth--;
     444             :                     ECHO;
     445             :                 }
     446             : 
     447             : {xcinside}      {
     448             :                     ECHO;
     449             :                 }
     450             : 
     451             : {op_chars}      {
     452             :                     ECHO;
     453             :                 }
     454             : 
     455             : \*+             {
     456             :                     ECHO;
     457             :                 }
     458             : } /* <xc> */
     459             : 
     460             : {xbstart}       {
     461             :                     BEGIN(xb);
     462             :                     ECHO;
     463             :                 }
     464             : <xh>{xhinside}    |
     465             : <xb>{xbinside}    {
     466             :                     ECHO;
     467             :                 }
     468             : 
     469             : {xhstart}       {
     470             :                     /* Hexadecimal bit type.
     471             :                      * At some point we should simply pass the string
     472             :                      * forward to the parser and label it there.
     473             :                      * In the meantime, place a leading "x" on the string
     474             :                      * to mark it for the input routine as a hex string.
     475             :                      */
     476             :                     BEGIN(xh);
     477             :                     ECHO;
     478             :                 }
     479             : 
     480             : {xnstart}       {
     481             :                     yyless(1);  /* eat only 'n' this time */
     482             :                     ECHO;
     483             :                 }
     484             : 
     485             : {xqstart}       {
     486             :                     if (cur_state->std_strings)
     487             :                         BEGIN(xq);
     488             :                     else
     489             :                         BEGIN(xe);
     490             :                     ECHO;
     491             :                 }
     492             : {xestart}       {
     493             :                     BEGIN(xe);
     494             :                     ECHO;
     495             :                 }
     496             : {xusstart}      {
     497             :                     BEGIN(xus);
     498             :                     ECHO;
     499             :                 }
     500             : 
     501             : <xb,xh,xq,xe,xus>{quote} {
     502             :                     /*
     503             :                      * When we are scanning a quoted string and see an end
     504             :                      * quote, we must look ahead for a possible continuation.
     505             :                      * If we don't see one, we know the end quote was in fact
     506             :                      * the end of the string.  To reduce the lexer table size,
     507             :                      * we use a single "xqs" state to do the lookahead for all
     508             :                      * types of strings.
     509             :                      */
     510             :                     cur_state->state_before_str_stop = YYSTATE;
     511             :                     BEGIN(xqs);
     512             :                     ECHO;
     513             :                 }
     514             : <xqs>{quotecontinue} {
     515             :                     /*
     516             :                      * Found a quote continuation, so return to the in-quote
     517             :                      * state and continue scanning the literal.  Nothing is
     518             :                      * added to the literal's contents.
     519             :                      */
     520             :                     BEGIN(cur_state->state_before_str_stop);
     521             :                     ECHO;
     522             :                 }
     523             : <xqs>{quotecontinuefail} |
     524             : <xqs>{other}  {
     525             :                     /*
     526             :                      * Failed to see a quote continuation.  Throw back
     527             :                      * everything after the end quote, and handle the string
     528             :                      * according to the state we were in previously.
     529             :                      */
     530             :                     yyless(0);
     531             :                     BEGIN(INITIAL);
     532             :                     /* There's nothing to echo ... */
     533             :                 }
     534             : 
     535             : <xq,xe,xus>{xqdouble} {
     536             :                     ECHO;
     537             :                 }
     538             : <xq,xus>{xqinside}  {
     539             :                     ECHO;
     540             :                 }
     541             : <xe>{xeinside}  {
     542             :                     ECHO;
     543             :                 }
     544             : <xe>{xeunicode} {
     545             :                     ECHO;
     546             :                 }
     547             : <xe>{xeunicodefail}   {
     548             :                     ECHO;
     549             :                 }
     550             : <xe>{xeescape}  {
     551             :                     ECHO;
     552             :                 }
     553             : <xe>{xeoctesc}  {
     554             :                     ECHO;
     555             :                 }
     556             : <xe>{xehexesc}  {
     557             :                     ECHO;
     558             :                 }
     559             : <xe>.         {
     560             :                     /* This is only needed for \ just before EOF */
     561             :                     ECHO;
     562             :                 }
     563             : 
     564             : {dolqdelim}     {
     565             :                     cur_state->dolqstart = pg_strdup(yytext);
     566             :                     BEGIN(xdolq);
     567             :                     ECHO;
     568             :                 }
     569             : {dolqfailed}    {
     570             :                     /* throw back all but the initial "$" */
     571             :                     yyless(1);
     572             :                     ECHO;
     573             :                 }
     574             : <xdolq>{dolqdelim} {
     575             :                     if (strcmp(yytext, cur_state->dolqstart) == 0)
     576             :                     {
     577             :                         free(cur_state->dolqstart);
     578             :                         cur_state->dolqstart = NULL;
     579             :                         BEGIN(INITIAL);
     580             :                     }
     581             :                     else
     582             :                     {
     583             :                         /*
     584             :                          * When we fail to match $...$ to dolqstart, transfer
     585             :                          * the $... part to the output, but put back the final
     586             :                          * $ for rescanning.  Consider $delim$...$junk$delim$
     587             :                          */
     588             :                         yyless(yyleng - 1);
     589             :                     }
     590             :                     ECHO;
     591             :                 }
     592             : <xdolq>{dolqinside} {
     593             :                     ECHO;
     594             :                 }
     595             : <xdolq>{dolqfailed} {
     596             :                     ECHO;
     597             :                 }
     598             : <xdolq>.      {
     599             :                     /* This is only needed for $ inside the quoted text */
     600             :                     ECHO;
     601             :                 }
     602             : 
     603             : {xdstart}       {
     604             :                     BEGIN(xd);
     605             :                     ECHO;
     606             :                 }
     607             : {xuistart}      {
     608             :                     BEGIN(xui);
     609             :                     ECHO;
     610             :                 }
     611             : <xd>{xdstop}  {
     612             :                     BEGIN(INITIAL);
     613             :                     ECHO;
     614             :                 }
     615             : <xui>{dquote} {
     616             :                     BEGIN(INITIAL);
     617             :                     ECHO;
     618             :                 }
     619             : <xd,xui>{xddouble}    {
     620             :                     ECHO;
     621             :                 }
     622             : <xd,xui>{xdinside}    {
     623             :                     ECHO;
     624             :                 }
     625             : 
     626             : {xufailed}  {
     627             :                     /* throw back all but the initial u/U */
     628             :                     yyless(1);
     629             :                     ECHO;
     630             :                 }
     631             : 
     632             : {typecast}      {
     633             :                     ECHO;
     634             :                 }
     635             : 
     636             : {dot_dot}       {
     637             :                     ECHO;
     638             :                 }
     639             : 
     640             : {colon_equals}  {
     641             :                     ECHO;
     642             :                 }
     643             : 
     644             : {equals_greater} {
     645             :                     ECHO;
     646             :                 }
     647             : 
     648             : {less_equals}   {
     649             :                     ECHO;
     650             :                 }
     651             : 
     652             : {greater_equals} {
     653             :                     ECHO;
     654             :                 }
     655             : 
     656             : {less_greater}  {
     657             :                     ECHO;
     658             :                 }
     659             : 
     660             : {not_equals}    {
     661             :                     ECHO;
     662             :                 }
     663             : 
     664             :     /*
     665             :      * These rules are specific to psql --- they implement parenthesis
     666             :      * counting and detection of command-ending semicolon.  These must
     667             :      * appear before the {self} rule so that they take precedence over it.
     668             :      */
     669             : 
     670             : "("               {
     671             :                     cur_state->paren_depth++;
     672             :                     ECHO;
     673             :                 }
     674             : 
     675             : ")"               {
     676             :                     if (cur_state->paren_depth > 0)
     677             :                         cur_state->paren_depth--;
     678             :                     ECHO;
     679             :                 }
     680             : 
     681             : ";"               {
     682             :                     ECHO;
     683             :                     if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
     684             :                     {
     685             :                         /* Terminate lexing temporarily */
     686             :                         cur_state->start_state = YY_START;
     687             :                         cur_state->identifier_count = 0;
     688             :                         return LEXRES_SEMI;
     689             :                     }
     690             :                 }
     691             : 
     692             :     /*
     693             :      * psql-specific rules to handle backslash commands and variable
     694             :      * substitution.  We want these before {self}, also.
     695             :      */
     696             : 
     697             : "\\"[;:]      {
     698             :                     /* Force a semi-colon or colon into the query buffer */
     699             :                     psqlscan_emit(cur_state, yytext + 1, 1);
     700             :                     if (yytext[1] == ';')
     701             :                         cur_state->identifier_count = 0;
     702             :                 }
     703             : 
     704             : "\\"          {
     705             :                     /* Terminate lexing temporarily */
     706             :                     cur_state->start_state = YY_START;
     707             :                     return LEXRES_BACKSLASH;
     708             :                 }
     709             : 
     710             : :{variable_char}+   {
     711             :                     /* Possible psql variable substitution */
     712             :                     char       *varname;
     713             :                     char       *value;
     714             : 
     715             :                     varname = psqlscan_extract_substring(cur_state,
     716             :                                                          yytext + 1,
     717             :                                                          yyleng - 1);
     718             :                     if (cur_state->callbacks->get_variable)
     719             :                         value = cur_state->callbacks->get_variable(varname,
     720             :                                                                    PQUOTE_PLAIN,
     721             :                                                                    cur_state->cb_passthrough);
     722             :                     else
     723             :                         value = NULL;
     724             : 
     725             :                     if (value)
     726             :                     {
     727             :                         /* It is a variable, check for recursion */
     728             :                         if (psqlscan_var_is_current_source(cur_state, varname))
     729             :                         {
     730             :                             /* Recursive expansion --- don't go there */
     731             :                             pg_log_warning("skipping recursive expansion of variable \"%s\"",
     732             :                                                               varname);
     733             :                             /* Instead copy the string as is */
     734             :                             ECHO;
     735             :                         }
     736             :                         else
     737             :                         {
     738             :                             /* OK, perform substitution */
     739             :                             psqlscan_push_new_buffer(cur_state, value, varname);
     740             :                             /* yy_scan_string already made buffer active */
     741             :                         }
     742             :                         free(value);
     743             :                     }
     744             :                     else
     745             :                     {
     746             :                         /*
     747             :                          * if the variable doesn't exist we'll copy the string
     748             :                          * as is
     749             :                          */
     750             :                         ECHO;
     751             :                     }
     752             : 
     753             :                     free(varname);
     754             :                 }
     755             : 
     756             : :'{variable_char}+' {
     757             :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     758             :                                              PQUOTE_SQL_LITERAL);
     759             :                 }
     760             : 
     761             : :\"{variable_char}+\" {
     762             :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     763             :                                              PQUOTE_SQL_IDENT);
     764             :                 }
     765             : 
     766             : :\{\?{variable_char}+\} {
     767             :                     psqlscan_test_variable(cur_state, yytext, yyleng);
     768             :                 }
     769             : 
     770             :     /*
     771             :      * These rules just avoid the need for scanner backup if one of the
     772             :      * three rules above fails to match completely.
     773             :      */
     774             : 
     775             : :'{variable_char}*  {
     776             :                     /* Throw back everything but the colon */
     777             :                     yyless(1);
     778             :                     ECHO;
     779             :                 }
     780             : 
     781             : :\"{variable_char}*    {
     782             :                     /* Throw back everything but the colon */
     783             :                     yyless(1);
     784             :                     ECHO;
     785             :                 }
     786             : 
     787             : :\{\?{variable_char}*   {
     788             :                     /* Throw back everything but the colon */
     789             :                     yyless(1);
     790             :                     ECHO;
     791             :                 }
     792             : :\{ {
     793             :                     /* Throw back everything but the colon */
     794             :                     yyless(1);
     795             :                     ECHO;
     796             :                 }
     797             : 
     798             :     /*
     799             :      * Back to backend-compatible rules.
     800             :      */
     801             : 
     802             : {self}          {
     803             :                     ECHO;
     804             :                 }
     805             : 
     806             : {operator}      {
     807             :                     /*
     808             :                      * Check for embedded slash-star or dash-dash; those
     809             :                      * are comment starts, so operator must stop there.
     810             :                      * Note that slash-star or dash-dash at the first
     811             :                      * character will match a prior rule, not this one.
     812             :                      */
     813             :                     int         nchars = yyleng;
     814             :                     char       *slashstar = strstr(yytext, "/*");
     815             :                     char       *dashdash = strstr(yytext, "--");
     816             : 
     817             :                     if (slashstar && dashdash)
     818             :                     {
     819             :                         /* if both appear, take the first one */
     820             :                         if (slashstar > dashdash)
     821             :                             slashstar = dashdash;
     822             :                     }
     823             :                     else if (!slashstar)
     824             :                         slashstar = dashdash;
     825             :                     if (slashstar)
     826             :                         nchars = slashstar - yytext;
     827             : 
     828             :                     /*
     829             :                      * For SQL compatibility, '+' and '-' cannot be the
     830             :                      * last char of a multi-char operator unless the operator
     831             :                      * contains chars that are not in SQL operators.
     832             :                      * The idea is to lex '=-' as two operators, but not
     833             :                      * to forbid operator names like '?-' that could not be
     834             :                      * sequences of SQL operators.
     835             :                      */
     836             :                     if (nchars > 1 &&
     837             :                         (yytext[nchars - 1] == '+' ||
     838             :                          yytext[nchars - 1] == '-'))
     839             :                     {
     840             :                         int         ic;
     841             : 
     842             :                         for (ic = nchars - 2; ic >= 0; ic--)
     843             :                         {
     844             :                             char c = yytext[ic];
     845             :                             if (c == '~' || c == '!' || c == '@' ||
     846             :                                 c == '#' || c == '^' || c == '&' ||
     847             :                                 c == '|' || c == '`' || c == '?' ||
     848             :                                 c == '%')
     849             :                                 break;
     850             :                         }
     851             :                         if (ic < 0)
     852             :                         {
     853             :                             /*
     854             :                              * didn't find a qualifying character, so remove
     855             :                              * all trailing [+-]
     856             :                              */
     857             :                             do {
     858             :                                 nchars--;
     859             :                             } while (nchars > 1 &&
     860             :                                  (yytext[nchars - 1] == '+' ||
     861             :                                   yytext[nchars - 1] == '-'));
     862             :                         }
     863             :                     }
     864             : 
     865             :                     if (nchars < yyleng)
     866             :                     {
     867             :                         /* Strip the unwanted chars from the token */
     868             :                         yyless(nchars);
     869             :                     }
     870             :                     ECHO;
     871             :                 }
     872             : 
     873             : {param}         {
     874             :                     ECHO;
     875             :                 }
     876             : {param_junk}    {
     877             :                     ECHO;
     878             :                 }
     879             : 
     880             : {decinteger}    {
     881             :                     ECHO;
     882             :                 }
     883             : {hexinteger}    {
     884             :                     ECHO;
     885             :                 }
     886             : {octinteger}    {
     887             :                     ECHO;
     888             :                 }
     889             : {bininteger}    {
     890             :                     ECHO;
     891             :                 }
     892             : {hexfail}       {
     893             :                     ECHO;
     894             :                 }
     895             : {octfail}       {
     896             :                     ECHO;
     897             :                 }
     898             : {binfail}       {
     899             :                     ECHO;
     900             :                 }
     901             : {numeric}       {
     902             :                     ECHO;
     903             :                 }
     904             : {numericfail}   {
     905             :                     /* throw back the .., and treat as integer */
     906             :                     yyless(yyleng - 2);
     907             :                     ECHO;
     908             :                 }
     909             : {real}          {
     910             :                     ECHO;
     911             :                 }
     912             : {realfail}      {
     913             :                     ECHO;
     914             :                 }
     915             : {integer_junk}  {
     916             :                     ECHO;
     917             :                 }
     918             : {numeric_junk}  {
     919             :                     ECHO;
     920             :                 }
     921             : {real_junk}     {
     922             :                     ECHO;
     923             :                 }
     924             : 
     925             : 
     926             : {identifier}    {
     927             :                     /*
     928             :                      * We need to track if we are inside a BEGIN .. END block
     929             :                      * in a function definition, so that semicolons contained
     930             :                      * therein don't terminate the whole statement.  Short of
     931             :                      * writing a full parser here, the following heuristic
     932             :                      * should work.  First, we track whether the beginning of
     933             :                      * the statement matches CREATE [OR REPLACE]
     934             :                      * {FUNCTION|PROCEDURE}
     935             :                      */
     936             : 
     937             :                     if (cur_state->identifier_count == 0)
     938             :                         memset(cur_state->identifiers, 0, sizeof(cur_state->identifiers));
     939             : 
     940             :                     if (pg_strcasecmp(yytext, "create") == 0 ||
     941             :                         pg_strcasecmp(yytext, "function") == 0 ||
     942             :                         pg_strcasecmp(yytext, "procedure") == 0 ||
     943             :                         pg_strcasecmp(yytext, "or") == 0 ||
     944             :                         pg_strcasecmp(yytext, "replace") == 0)
     945             :                     {
     946             :                         if (cur_state->identifier_count < sizeof(cur_state->identifiers))
     947             :                             cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned char) yytext[0]);
     948             :                     }
     949             : 
     950             :                     cur_state->identifier_count++;
     951             : 
     952             :                     if (cur_state->identifiers[0] == 'c' &&
     953             :                         (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' ||
     954             :                          (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' &&
     955             :                           (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p'))) &&
     956             :                         cur_state->paren_depth == 0)
     957             :                     {
     958             :                         if (pg_strcasecmp(yytext, "begin") == 0)
     959             :                             cur_state->begin_depth++;
     960             :                         else if (pg_strcasecmp(yytext, "case") == 0)
     961             :                         {
     962             :                             /*
     963             :                              * CASE also ends with END.  We only need to track
     964             :                              * this if we are already inside a BEGIN.
     965             :                              */
     966             :                             if (cur_state->begin_depth >= 1)
     967             :                                 cur_state->begin_depth++;
     968             :                         }
     969             :                         else if (pg_strcasecmp(yytext, "end") == 0)
     970             :                         {
     971             :                             if (cur_state->begin_depth > 0)
     972             :                                 cur_state->begin_depth--;
     973             :                         }
     974             :                     }
     975             : 
     976             :                     ECHO;
     977             :                 }
     978             : 
     979             : {other}         {
     980             :                     ECHO;
     981             :                 }
     982             : 
     983             : <<EOF>>         {
     984             :                     if (cur_state->buffer_stack == NULL)
     985             :                     {
     986             :                         cur_state->start_state = YY_START;
     987             :                         return LEXRES_EOL;      /* end of input reached */
     988             :                     }
     989             : 
     990             :                     /*
     991             :                      * We were expanding a variable, so pop the inclusion
     992             :                      * stack and keep lexing
     993             :                      */
     994             :                     psqlscan_pop_buffer_stack(cur_state);
     995             :                     psqlscan_select_top_buffer(cur_state);
     996             :                 }
     997             : 
     998             : %%
     999             : 
    1000             : /* LCOV_EXCL_STOP */
    1001             : 
    1002             : /*
    1003             :  * Create a lexer working state struct.
    1004             :  *
    1005             :  * callbacks is a struct of function pointers that encapsulate some
    1006             :  * behavior we need from the surrounding program.  This struct must
    1007             :  * remain valid for the lifespan of the PsqlScanState.
    1008             :  */
    1009             : PsqlScanState
    1010             : psql_scan_create(const PsqlScanCallbacks *callbacks)
    1011       18380 : {
    1012             :     PsqlScanState state;
    1013             : 
    1014             :     state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
    1015       18380 : 
    1016             :     state->callbacks = callbacks;
    1017       18380 : 
    1018             :     yylex_init(&state->scanner);
    1019       18380 : 
    1020             :     yyset_extra(state, state->scanner);
    1021       18380 : 
    1022             :     psql_scan_reset(state);
    1023       18380 : 
    1024             :     return state;
    1025       18380 : }
    1026             : 
    1027             : /*
    1028             :  * Destroy a lexer working state struct, releasing all resources.
    1029             :  */
    1030             : void
    1031             : psql_scan_destroy(PsqlScanState state)
    1032       18278 : {
    1033             :     psql_scan_finish(state);
    1034       18278 : 
    1035             :     psql_scan_reset(state);
    1036       18278 : 
    1037             :     yylex_destroy(state->scanner);
    1038       18278 : 
    1039             :     free(state);
    1040       18278 : }
    1041       18278 : 
    1042             : /*
    1043             :  * Set the callback passthrough pointer for the lexer.
    1044             :  *
    1045             :  * This could have been integrated into psql_scan_create, but keeping it
    1046             :  * separate allows the application to change the pointer later, which might
    1047             :  * be useful.
    1048             :  */
    1049             : void
    1050             : psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
    1051       17840 : {
    1052             :     state->cb_passthrough = passthrough;
    1053       17840 : }
    1054       17840 : 
    1055             : /*
    1056             :  * Set up to perform lexing of the given input line.
    1057             :  *
    1058             :  * The text at *line, extending for line_len bytes, will be scanned by
    1059             :  * subsequent calls to the psql_scan routines.  psql_scan_finish should
    1060             :  * be called when scanning is complete.  Note that the lexer retains
    1061             :  * a pointer to the storage at *line --- this string must not be altered
    1062             :  * or freed until after psql_scan_finish is called.
    1063             :  *
    1064             :  * encoding is the libpq identifier for the character encoding in use,
    1065             :  * and std_strings says whether standard_conforming_strings is on.
    1066             :  */
    1067             : void
    1068             : psql_scan_setup(PsqlScanState state,
    1069      626798 :                 const char *line, int line_len,
    1070             :                 int encoding, bool std_strings)
    1071             : {
    1072             :     /* Mustn't be scanning already */
    1073             :     Assert(state->scanbufhandle == NULL);
    1074             :     Assert(state->buffer_stack == NULL);
    1075             : 
    1076             :     /* Do we need to hack the character set encoding? */
    1077             :     state->encoding = encoding;
    1078      626798 :     state->safe_encoding = pg_valid_server_encoding_id(encoding);
    1079      626798 : 
    1080             :     /* Save standard-strings flag as well */
    1081             :     state->std_strings = std_strings;
    1082      626798 : 
    1083             :     /* Set up flex input buffer with appropriate translation and padding */
    1084             :     state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
    1085      626798 :                                                    &state->scanbuf);
    1086             :     state->scanline = line;
    1087      626798 : 
    1088             :     /* Set lookaside data in case we have to map unsafe encoding */
    1089             :     state->curline = state->scanbuf;
    1090      626798 :     state->refline = state->scanline;
    1091      626798 : }
    1092      626798 : 
    1093             : /*
    1094             :  * Do lexical analysis of SQL command text.
    1095             :  *
    1096             :  * The text previously passed to psql_scan_setup is scanned, and appended
    1097             :  * (possibly with transformation) to query_buf.
    1098             :  *
    1099             :  * The return value indicates the condition that stopped scanning:
    1100             :  *
    1101             :  * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
    1102             :  * transferred to query_buf.)  The command accumulated in query_buf should
    1103             :  * be executed, then clear query_buf and call again to scan the remainder
    1104             :  * of the line.
    1105             :  *
    1106             :  * PSCAN_BACKSLASH: found a backslash that starts a special command.
    1107             :  * Any previous data on the line has been transferred to query_buf.
    1108             :  * The caller will typically next apply a separate flex lexer to scan
    1109             :  * the special command.
    1110             :  *
    1111             :  * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
    1112             :  * incomplete SQL command.  *prompt is set to the appropriate prompt type.
    1113             :  *
    1114             :  * PSCAN_EOL: the end of the line was reached, and there is no lexical
    1115             :  * reason to consider the command incomplete.  The caller may or may not
    1116             :  * choose to send it.  *prompt is set to the appropriate prompt type if
    1117             :  * the caller chooses to collect more input.
    1118             :  *
    1119             :  * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
    1120             :  * be called next, then the cycle may be repeated with a fresh input line.
    1121             :  *
    1122             :  * In all cases, *prompt is set to an appropriate prompt type code for the
    1123             :  * next line-input operation.
    1124             :  */
    1125             : PsqlScanResult
    1126             : psql_scan(PsqlScanState state,
    1127      957702 :           PQExpBuffer query_buf,
    1128             :           promptStatus_t *prompt)
    1129             : {
    1130             :     PsqlScanResult result;
    1131             :     int         lexresult;
    1132             : 
    1133             :     /* Must be scanning already */
    1134             :     Assert(state->scanbufhandle != NULL);
    1135             : 
    1136             :     /* Set current output target */
    1137             :     state->output_buf = query_buf;
    1138      957702 : 
    1139             :     /* Set input source */
    1140             :     if (state->buffer_stack != NULL)
    1141      957702 :         yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
    1142          90 :     else
    1143             :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1144      957612 : 
    1145             :     /* And lex. */
    1146             :     lexresult = yylex(NULL, state->scanner);
    1147      957702 : 
    1148             :     /*
    1149             :      * Check termination state and return appropriate result info.
    1150             :      */
    1151             :     switch (lexresult)
    1152      957702 :     {
    1153             :         case LEXRES_EOL:        /* end of input */
    1154      626546 :             switch (state->start_state)
    1155      626546 :             {
    1156             :                 case INITIAL:
    1157      579730 :                 case xqs:       /* we treat this like INITIAL */
    1158             :                     if (state->paren_depth > 0)
    1159      579730 :                     {
    1160             :                         result = PSCAN_INCOMPLETE;
    1161       47784 :                         *prompt = PROMPT_PAREN;
    1162       47784 :                     }
    1163             :                     else if (state->begin_depth > 0)
    1164      531946 :                     {
    1165             :                         result = PSCAN_INCOMPLETE;
    1166         452 :                         *prompt = PROMPT_CONTINUE;
    1167         452 :                     }
    1168             :                     else if (query_buf->len > 0)
    1169      531494 :                     {
    1170             :                         result = PSCAN_EOL;
    1171      105890 :                         *prompt = PROMPT_CONTINUE;
    1172      105890 :                     }
    1173             :                     else
    1174             :                     {
    1175             :                         /* never bother to send an empty buffer */
    1176             :                         result = PSCAN_INCOMPLETE;
    1177      425604 :                         *prompt = PROMPT_READY;
    1178      425604 :                     }
    1179             :                     break;
    1180      579730 :                 case xb:
    1181           0 :                     result = PSCAN_INCOMPLETE;
    1182           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1183           0 :                     break;
    1184           0 :                 case xc:
    1185         794 :                     result = PSCAN_INCOMPLETE;
    1186         794 :                     *prompt = PROMPT_COMMENT;
    1187         794 :                     break;
    1188         794 :                 case xd:
    1189          26 :                     result = PSCAN_INCOMPLETE;
    1190          26 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1191          26 :                     break;
    1192          26 :                 case xh:
    1193           0 :                     result = PSCAN_INCOMPLETE;
    1194           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1195           0 :                     break;
    1196           0 :                 case xe:
    1197         602 :                     result = PSCAN_INCOMPLETE;
    1198         602 :                     *prompt = PROMPT_SINGLEQUOTE;
    1199         602 :                     break;
    1200         602 :                 case xq:
    1201        9502 :                     result = PSCAN_INCOMPLETE;
    1202        9502 :                     *prompt = PROMPT_SINGLEQUOTE;
    1203        9502 :                     break;
    1204        9502 :                 case xdolq:
    1205       35892 :                     result = PSCAN_INCOMPLETE;
    1206       35892 :                     *prompt = PROMPT_DOLLARQUOTE;
    1207       35892 :                     break;
    1208       35892 :                 case xui:
    1209           0 :                     result = PSCAN_INCOMPLETE;
    1210           0 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1211           0 :                     break;
    1212           0 :                 case xus:
    1213           0 :                     result = PSCAN_INCOMPLETE;
    1214           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1215           0 :                     break;
    1216           0 :                 default:
    1217           0 :                     /* can't get here */
    1218             :                     fprintf(stderr, "invalid YY_START\n");
    1219           0 :                     exit(1);
    1220           0 :             }
    1221             :             break;
    1222      626546 :         case LEXRES_SEMI:       /* semicolon */
    1223      316356 :             result = PSCAN_SEMICOLON;
    1224      316356 :             *prompt = PROMPT_READY;
    1225      316356 :             break;
    1226      316356 :         case LEXRES_BACKSLASH:  /* backslash */
    1227       14800 :             result = PSCAN_BACKSLASH;
    1228       14800 :             *prompt = PROMPT_READY;
    1229       14800 :             break;
    1230       14800 :         default:
    1231           0 :             /* can't get here */
    1232             :             fprintf(stderr, "invalid yylex result\n");
    1233           0 :             exit(1);
    1234           0 :     }
    1235             : 
    1236             :     return result;
    1237      957702 : }
    1238             : 
    1239             : /*
    1240             :  * Clean up after scanning a string.  This flushes any unread input and
    1241             :  * releases resources (but not the PsqlScanState itself).  Note however
    1242             :  * that this does not reset the lexer scan state; that can be done by
    1243             :  * psql_scan_reset(), which is an orthogonal operation.
    1244             :  *
    1245             :  * It is legal to call this when not scanning anything (makes it easier
    1246             :  * to deal with error recovery).
    1247             :  */
    1248             : void
    1249             : psql_scan_finish(PsqlScanState state)
    1250      644972 : {
    1251             :     /* Drop any incomplete variable expansions. */
    1252             :     while (state->buffer_stack != NULL)
    1253      644972 :         psqlscan_pop_buffer_stack(state);
    1254           0 : 
    1255             :     /* Done with the outer scan buffer, too */
    1256             :     if (state->scanbufhandle)
    1257      644972 :         yy_delete_buffer(state->scanbufhandle, state->scanner);
    1258      626698 :     state->scanbufhandle = NULL;
    1259      644972 :     if (state->scanbuf)
    1260      644972 :         free(state->scanbuf);
    1261      626698 :     state->scanbuf = NULL;
    1262      644972 : }
    1263      644972 : 
    1264             : /*
    1265             :  * Reset lexer scanning state to start conditions.  This is appropriate
    1266             :  * for executing \r psql commands (or any other time that we discard the
    1267             :  * prior contents of query_buf).  It is not, however, necessary to do this
    1268             :  * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
    1269             :  * PSCAN_EOL scan result, because the scan state must be INITIAL when those
    1270             :  * conditions are returned.
    1271             :  *
    1272             :  * Note that this is unrelated to flushing unread input; that task is
    1273             :  * done by psql_scan_finish().
    1274             :  */
    1275             : void
    1276             : psql_scan_reset(PsqlScanState state)
    1277       37992 : {
    1278             :     state->start_state = INITIAL;
    1279       37992 :     state->paren_depth = 0;
    1280       37992 :     state->xcdepth = 0;          /* not really necessary */
    1281       37992 :     if (state->dolqstart)
    1282       37992 :         free(state->dolqstart);
    1283           0 :     state->dolqstart = NULL;
    1284       37992 :     state->identifier_count = 0;
    1285       37992 :     state->begin_depth = 0;
    1286       37992 : }
    1287       37992 : 
    1288             : /*
    1289             :  * Reselect this lexer (psqlscan.l) after using another one.
    1290             :  *
    1291             :  * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
    1292             :  * state, because we'd never switch to another lexer in a different state.
    1293             :  * However, we don't want to reset e.g. paren_depth, so this can't be
    1294             :  * the same as psql_scan_reset().
    1295             :  *
    1296             :  * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
    1297             :  * must be a superset of this.
    1298             :  *
    1299             :  * Note: it seems likely that other lexers could just assign INITIAL for
    1300             :  * themselves, since that probably has the value zero in every flex-generated
    1301             :  * lexer.  But let's not assume that.
    1302             :  */
    1303             : void
    1304             : psql_scan_reselect_sql_lexer(PsqlScanState state)
    1305       66388 : {
    1306             :     state->start_state = INITIAL;
    1307       66388 : }
    1308       66388 : 
    1309             : /*
    1310             :  * Return true if lexer is currently in an "inside quotes" state.
    1311             :  *
    1312             :  * This is pretty grotty but is needed to preserve the old behavior
    1313             :  * that mainloop.c drops blank lines not inside quotes without even
    1314             :  * echoing them.
    1315             :  */
    1316             : bool
    1317             : psql_scan_in_quote(PsqlScanState state)
    1318      130406 : {
    1319             :     return state->start_state != INITIAL &&
    1320      131368 :             state->start_state != xqs;
    1321         962 : }
    1322             : 
    1323             : /*
    1324             :  * Push the given string onto the stack of stuff to scan.
    1325             :  *
    1326             :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1327             :  */
    1328             : void
    1329             : psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
    1330        1214 :                          const char *varname)
    1331             : {
    1332             :     StackElem  *stackelem;
    1333             : 
    1334             :     stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
    1335        1214 : 
    1336             :     /*
    1337             :      * In current usage, the passed varname points at the current flex input
    1338             :      * buffer; we must copy it before calling psqlscan_prepare_buffer()
    1339             :      * because that will change the buffer state.
    1340             :      */
    1341             :     stackelem->varname = varname ? pg_strdup(varname) : NULL;
    1342        1214 : 
    1343             :     stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
    1344        1214 :                                              &stackelem->bufstring);
    1345             :     state->curline = stackelem->bufstring;
    1346        1214 :     if (state->safe_encoding)
    1347        1214 :     {
    1348             :         stackelem->origstring = NULL;
    1349        1214 :         state->refline = stackelem->bufstring;
    1350        1214 :     }
    1351             :     else
    1352             :     {
    1353             :         stackelem->origstring = pg_strdup(newstr);
    1354           0 :         state->refline = stackelem->origstring;
    1355           0 :     }
    1356             :     stackelem->next = state->buffer_stack;
    1357        1214 :     state->buffer_stack = stackelem;
    1358        1214 : }
    1359        1214 : 
    1360             : /*
    1361             :  * Pop the topmost buffer stack item (there must be one!)
    1362             :  *
    1363             :  * NB: after this, the flex input state is unspecified; caller must
    1364             :  * switch to an appropriate buffer to continue lexing.
    1365             :  * See psqlscan_select_top_buffer().
    1366             :  */
    1367             : void
    1368             : psqlscan_pop_buffer_stack(PsqlScanState state)
    1369        1214 : {
    1370             :     StackElem  *stackelem = state->buffer_stack;
    1371        1214 : 
    1372             :     state->buffer_stack = stackelem->next;
    1373        1214 :     yy_delete_buffer(stackelem->buf, state->scanner);
    1374        1214 :     free(stackelem->bufstring);
    1375        1214 :     if (stackelem->origstring)
    1376        1214 :         free(stackelem->origstring);
    1377           0 :     if (stackelem->varname)
    1378        1214 :         free(stackelem->varname);
    1379        1214 :     free(stackelem);
    1380        1214 : }
    1381        1214 : 
    1382             : /*
    1383             :  * Select the topmost surviving buffer as the active input.
    1384             :  */
    1385             : void
    1386             : psqlscan_select_top_buffer(PsqlScanState state)
    1387        1214 : {
    1388             :     StackElem  *stackelem = state->buffer_stack;
    1389        1214 : 
    1390             :     if (stackelem != NULL)
    1391        1214 :     {
    1392             :         yy_switch_to_buffer(stackelem->buf, state->scanner);
    1393           0 :         state->curline = stackelem->bufstring;
    1394           0 :         state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
    1395           0 :     }
    1396             :     else
    1397             :     {
    1398             :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1399        1214 :         state->curline = state->scanbuf;
    1400        1214 :         state->refline = state->scanline;
    1401        1214 :     }
    1402             : }
    1403        1214 : 
    1404             : /*
    1405             :  * Check if specified variable name is the source for any string
    1406             :  * currently being scanned
    1407             :  */
    1408             : bool
    1409             : psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
    1410        1214 : {
    1411             :     StackElem  *stackelem;
    1412             : 
    1413             :     for (stackelem = state->buffer_stack;
    1414        1214 :          stackelem != NULL;
    1415             :          stackelem = stackelem->next)
    1416           0 :     {
    1417             :         if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
    1418           0 :             return true;
    1419           0 :     }
    1420             :     return false;
    1421        1214 : }
    1422             : 
    1423             : /*
    1424             :  * Set up a flex input buffer to scan the given data.  We always make a
    1425             :  * copy of the data.  If working in an unsafe encoding, the copy has
    1426             :  * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
    1427             :  *
    1428             :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1429             :  */
    1430             : YY_BUFFER_STATE
    1431             : psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
    1432      628012 :                         char **txtcopy)
    1433             : {
    1434             :     char       *newtxt;
    1435             : 
    1436             :     /* Flex wants two \0 characters after the actual data */
    1437             :     newtxt = pg_malloc(len + 2);
    1438      628012 :     *txtcopy = newtxt;
    1439      628012 :     newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
    1440      628012 : 
    1441             :     if (state->safe_encoding)
    1442      628012 :         memcpy(newtxt, txt, len);
    1443      628012 :     else
    1444             :     {
    1445             :         /* Gotta do it the hard way */
    1446             :         int         i = 0;
    1447           0 : 
    1448             :         while (i < len)
    1449           0 :         {
    1450             :             int         thislen = PQmblen(txt + i, state->encoding);
    1451           0 : 
    1452             :             /* first byte should always be okay... */
    1453             :             newtxt[i] = txt[i];
    1454           0 :             i++;
    1455           0 :             while (--thislen > 0 && i < len)
    1456           0 :                 newtxt[i++] = (char) 0xFF;
    1457           0 :         }
    1458             :     }
    1459             : 
    1460             :     return yy_scan_buffer(newtxt, len + 2, state->scanner);
    1461      628012 : }
    1462             : 
    1463             : /*
    1464             :  * psqlscan_emit() --- body for ECHO macro
    1465             :  *
    1466             :  * NB: this must be used for ALL and ONLY the text copied from the flex
    1467             :  * input data.  If you pass it something that is not part of the yytext
    1468             :  * string, you are making a mistake.  Internally generated text can be
    1469             :  * appended directly to state->output_buf.
    1470             :  */
    1471             : void
    1472             : psqlscan_emit(PsqlScanState state, const char *txt, int len)
    1473     7507080 : {
    1474             :     PQExpBuffer output_buf = state->output_buf;
    1475     7507080 : 
    1476             :     if (state->safe_encoding)
    1477     7507080 :         appendBinaryPQExpBuffer(output_buf, txt, len);
    1478     7507080 :     else
    1479             :     {
    1480             :         /* Gotta do it the hard way */
    1481             :         const char *reference = state->refline;
    1482           0 :         int         i;
    1483             : 
    1484             :         reference += (txt - state->curline);
    1485           0 : 
    1486             :         for (i = 0; i < len; i++)
    1487           0 :         {
    1488             :             char        ch = txt[i];
    1489           0 : 
    1490             :             if (ch == (char) 0xFF)
    1491           0 :                 ch = reference[i];
    1492           0 :             appendPQExpBufferChar(output_buf, ch);
    1493           0 :         }
    1494             :     }
    1495             : }
    1496     7507080 : 
    1497             : /*
    1498             :  * psqlscan_extract_substring --- fetch value of (part of) the current token
    1499             :  *
    1500             :  * This is like psqlscan_emit(), except that the data is returned as a
    1501             :  * malloc'd string rather than being pushed directly to state->output_buf.
    1502             :  */
    1503             : char *
    1504             : psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
    1505        4826 : {
    1506             :     char       *result = (char *) pg_malloc(len + 1);
    1507        4826 : 
    1508             :     if (state->safe_encoding)
    1509        4826 :         memcpy(result, txt, len);
    1510        4826 :     else
    1511             :     {
    1512             :         /* Gotta do it the hard way */
    1513             :         const char *reference = state->refline;
    1514           0 :         int         i;
    1515             : 
    1516             :         reference += (txt - state->curline);
    1517           0 : 
    1518             :         for (i = 0; i < len; i++)
    1519           0 :         {
    1520             :             char        ch = txt[i];
    1521           0 : 
    1522             :             if (ch == (char) 0xFF)
    1523           0 :                 ch = reference[i];
    1524           0 :             result[i] = ch;
    1525           0 :         }
    1526             :     }
    1527             :     result[len] = '\0';
    1528        4826 :     return result;
    1529        4826 : }
    1530             : 
    1531             : /*
    1532             :  * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
    1533             :  *
    1534             :  * If the variable name is found, escape its value using the appropriate
    1535             :  * quoting method and emit the value to output_buf.  (Since the result is
    1536             :  * surely quoted, there is never any reason to rescan it.)  If we don't
    1537             :  * find the variable or escaping fails, emit the token as-is.
    1538             :  */
    1539             : void
    1540             : psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
    1541         870 :                          PsqlScanQuoteType quote)
    1542             : {
    1543             :     char       *varname;
    1544             :     char       *value;
    1545             : 
    1546             :     /* Variable lookup. */
    1547             :     varname = psqlscan_extract_substring(state, txt + 2, len - 3);
    1548         870 :     if (state->callbacks->get_variable)
    1549         870 :         value = state->callbacks->get_variable(varname, quote,
    1550         870 :                                                state->cb_passthrough);
    1551             :     else
    1552             :         value = NULL;
    1553           0 :     free(varname);
    1554         870 : 
    1555             :     if (value)
    1556         870 :     {
    1557             :         /* Emit the suitably-escaped value */
    1558             :         appendPQExpBufferStr(state->output_buf, value);
    1559         814 :         free(value);
    1560         814 :     }
    1561             :     else
    1562             :     {
    1563             :         /* Emit original token as-is */
    1564             :         psqlscan_emit(state, txt, len);
    1565          56 :     }
    1566             : }
    1567         870 : 
    1568             : void
    1569             : psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
    1570          32 : {
    1571             :     char    *varname;
    1572             :     char    *value;
    1573             : 
    1574             :     varname = psqlscan_extract_substring(state, txt + 3, len - 4);
    1575          32 :     if (state->callbacks->get_variable)
    1576          32 :         value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
    1577          32 :                                                state->cb_passthrough);
    1578             :     else
    1579             :         value = NULL;
    1580           0 :     free(varname);
    1581          32 : 
    1582             :     if (value != NULL)
    1583          32 :     {
    1584             :         psqlscan_emit(state, "TRUE", 4);
    1585          14 :         free(value);
    1586          14 :     }
    1587             :     else
    1588             :     {
    1589             :         psqlscan_emit(state, "FALSE", 5);
    1590          18 :     }
    1591             : }

Generated by: LCOV version 1.14