LCOV - code coverage report
Current view: top level - src/fe_utils - psqlscan.l (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 170 226 75.2 %
Date: 2025-01-18 03:14:54 Functions: 18 18 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : %top{
       2             : /*-------------------------------------------------------------------------
       3             :  *
       4             :  * psqlscan.l
       5             :  *    lexical scanner for SQL commands
       6             :  *
       7             :  * This lexer used to be part of psql, and that heritage is reflected in
       8             :  * the file name as well as function and typedef names, though it can now
       9             :  * be used by other frontend programs as well.  It's also possible to extend
      10             :  * this lexer with a compatible add-on lexer to handle program-specific
      11             :  * backslash commands.
      12             :  *
      13             :  * This code is mainly concerned with determining where the end of a SQL
      14             :  * statement is: we are looking for semicolons that are not within quotes,
      15             :  * comments, or parentheses.  The most reliable way to handle this is to
      16             :  * borrow the backend's flex lexer rules, lock, stock, and barrel.  The rules
      17             :  * below are (except for a few) the same as the backend's, but their actions
      18             :  * are just ECHO whereas the backend's actions generally do other things.
      19             :  *
      20             :  * XXX The rules in this file must be kept in sync with the backend lexer!!!
      21             :  *
      22             :  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
      23             :  *
      24             :  * See psqlscan_int.h for additional commentary.
      25             :  *
      26             :  *
      27             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      28             :  * Portions Copyright (c) 1994, Regents of the University of California
      29             :  *
      30             :  * IDENTIFICATION
      31             :  *    src/fe_utils/psqlscan.l
      32             :  *
      33             :  *-------------------------------------------------------------------------
      34             :  */
      35             : #include "postgres_fe.h"
      36             : 
      37             : #include "common/logging.h"
      38             : #include "fe_utils/psqlscan.h"
      39             : 
      40             : #include "libpq-fe.h"
      41             : }
      42             : 
      43             : %{
      44             : 
      45             : /* LCOV_EXCL_START */
      46             : 
      47             : #include "fe_utils/psqlscan_int.h"
      48             : 
      49             : /*
      50             :  * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
      51             :  * doesn't presently make use of that argument, so just declare it as int.
      52             :  */
      53             : typedef int YYSTYPE;
      54             : 
      55             : 
      56             : /* Return values from yylex() */
      57             : #define LEXRES_EOL          0   /* end of input */
      58             : #define LEXRES_SEMI         1   /* command-terminating semicolon found */
      59             : #define LEXRES_BACKSLASH    2   /* backslash command start */
      60             : 
      61             : 
      62             : #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
      63             : 
      64             : %}
      65             : 
      66             : %option reentrant
      67             : %option bison-bridge
      68             : %option 8bit
      69             : %option never-interactive
      70             : %option nodefault
      71             : %option noinput
      72             : %option nounput
      73             : %option noyywrap
      74             : %option warn
      75             : %option prefix="psql_yy"
      76             : 
      77             : /*
      78             :  * Set the type of yyextra; we use it as a pointer back to the containing
      79             :  * PsqlScanState.
      80             :  */
      81             : %option extra-type="PsqlScanState"
      82             : 
      83             : /*
      84             :  * All of the following definitions and rules should exactly match
      85             :  * src/backend/parser/scan.l so far as the flex patterns are concerned.
      86             :  * The rule bodies are just ECHO as opposed to what the backend does,
      87             :  * however.  (But be sure to duplicate code that affects the lexing process,
      88             :  * such as BEGIN() and yyless().)  Also, psqlscan uses a single <<EOF>> rule
      89             :  * whereas scan.l has a separate one for each exclusive state.
      90             :  */
      91             : 
      92             : /*
      93             :  * OK, here is a short description of lex/flex rules behavior.
      94             :  * The longest pattern which matches an input string is always chosen.
      95             :  * For equal-length patterns, the first occurring in the rules list is chosen.
      96             :  * INITIAL is the starting state, to which all non-conditional rules apply.
      97             :  * Exclusive states change parsing rules while the state is active.  When in
      98             :  * an exclusive state, only those rules defined for that state apply.
      99             :  *
     100             :  * We use exclusive states for quoted strings, extended comments,
     101             :  * and to eliminate parsing troubles for numeric strings.
     102             :  * Exclusive states:
     103             :  *  <xb> bit string literal
     104             :  *  <xc> extended C-style comments
     105             :  *  <xd> delimited identifiers (double-quoted identifiers)
     106             :  *  <xh> hexadecimal byte string
     107             :  *  <xq> standard quoted strings
     108             :  *  <xqs> quote stop (detect continued strings)
     109             :  *  <xe> extended quoted strings (support backslash escape sequences)
     110             :  *  <xdolq> $foo$ quoted strings
     111             :  *  <xui> quoted identifier with Unicode escapes
     112             :  *  <xus> quoted string with Unicode escapes
     113             :  *
     114             :  * Note: we intentionally don't mimic the backend's <xeu> state; we have
     115             :  * no need to distinguish it from <xe> state, and no good way to get out
     116             :  * of it in error cases.  The backend just throws yyerror() in those
     117             :  * cases, but that's not an option here.
     118             :  */
     119             : 
     120             : %x xb
     121             : %x xc
     122             : %x xd
     123             : %x xh
     124             : %x xq
     125             : %x xqs
     126             : %x xe
     127             : %x xdolq
     128             : %x xui
     129             : %x xus
     130             : 
     131             : /*
     132             :  * In order to make the world safe for Windows and Mac clients as well as
     133             :  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
     134             :  * sequence will be seen as two successive newlines, but that doesn't cause
     135             :  * any problems.  Comments that start with -- and extend to the next
     136             :  * newline are treated as equivalent to a single whitespace character.
     137             :  *
     138             :  * NOTE a fine point: if there is no newline following --, we will absorb
     139             :  * everything to the end of the input as a comment.  This is correct.  Older
     140             :  * versions of Postgres failed to recognize -- as a comment if the input
     141             :  * did not end with a newline.
     142             :  *
     143             :  * non_newline_space tracks all space characters except newlines.
     144             :  *
     145             :  * XXX if you change the set of whitespace characters, fix scanner_isspace()
     146             :  * to agree.
     147             :  */
     148             : 
     149             : space               [ \t\n\r\f\v]
     150             : non_newline_space   [ \t\f\v]
     151             : newline             [\n\r]
     152             : non_newline         [^\n\r]
     153             : 
     154             : comment         ("--"{non_newline}*)
     155             : 
     156             : whitespace      ({space}+|{comment})
     157             : 
     158             : /*
     159             :  * SQL requires at least one newline in the whitespace separating
     160             :  * string literals that are to be concatenated.  Silly, but who are we
     161             :  * to argue?  Note that {whitespace_with_newline} should not have * after
     162             :  * it, whereas {whitespace} should generally have a * after it...
     163             :  */
     164             : 
     165             : special_whitespace      ({space}+|{comment}{newline})
     166             : non_newline_whitespace  ({non_newline_space}|{comment})
     167             : whitespace_with_newline ({non_newline_whitespace}*{newline}{special_whitespace}*)
     168             : 
     169             : quote           '
     170             : /* If we see {quote} then {quotecontinue}, the quoted string continues */
     171             : quotecontinue   {whitespace_with_newline}{quote}
     172             : 
     173             : /*
     174             :  * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
     175             :  * {quotecontinue}.  It might seem that this could just be {whitespace}*,
     176             :  * but if there's a dash after {whitespace_with_newline}, it must be consumed
     177             :  * to see if there's another dash --- which would start a {comment} and thus
     178             :  * allow continuation of the {quotecontinue} token.
     179             :  */
     180             : quotecontinuefail   {whitespace}*"-"?
     181             : 
     182             : /* Bit string
     183             :  * It is tempting to scan the string for only those characters
     184             :  * which are allowed. However, this leads to silently swallowed
     185             :  * characters if illegal characters are included in the string.
     186             :  * For example, if xbinside is [01] then B'ABCD' is interpreted
     187             :  * as a zero-length string, and the ABCD' is lost!
     188             :  * Better to pass the string forward and let the input routines
     189             :  * validate the contents.
     190             :  */
     191             : xbstart         [bB]{quote}
     192             : xbinside        [^']*
     193             : 
     194             : /* Hexadecimal byte string */
     195             : xhstart         [xX]{quote}
     196             : xhinside        [^']*
     197             : 
     198             : /* National character */
     199             : xnstart         [nN]{quote}
     200             : 
     201             : /* Quoted string that allows backslash escapes */
     202             : xestart         [eE]{quote}
     203             : xeinside        [^\\']+
     204             : xeescape        [\\][^0-7]
     205             : xeoctesc        [\\][0-7]{1,3}
     206             : xehexesc        [\\]x[0-9A-Fa-f]{1,2}
     207             : xeunicode       [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
     208             : xeunicodefail   [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
     209             : 
     210             : /* Extended quote
     211             :  * xqdouble implements embedded quote, ''''
     212             :  */
     213             : xqstart         {quote}
     214             : xqdouble        {quote}{quote}
     215             : xqinside        [^']+
     216             : 
     217             : /* $foo$ style quotes ("dollar quoting")
     218             :  * The quoted string starts with $foo$ where "foo" is an optional string
     219             :  * in the form of an identifier, except that it may not contain "$",
     220             :  * and extends to the first occurrence of an identical string.
     221             :  * There is *no* processing of the quoted text.
     222             :  *
     223             :  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
     224             :  * fails to match its trailing "$".
     225             :  */
     226             : dolq_start      [A-Za-z\200-\377_]
     227             : dolq_cont       [A-Za-z\200-\377_0-9]
     228             : dolqdelim       \$({dolq_start}{dolq_cont}*)?\$
     229             : dolqfailed      \${dolq_start}{dolq_cont}*
     230             : dolqinside      [^$]+
     231             : 
     232             : /* Double quote
     233             :  * Allows embedded spaces and other special characters into identifiers.
     234             :  */
     235             : dquote          \"
     236             : xdstart         {dquote}
     237             : xdstop          {dquote}
     238             : xddouble        {dquote}{dquote}
     239             : xdinside        [^"]+
     240             : 
     241             : /* Quoted identifier with Unicode escapes */
     242             : xuistart        [uU]&{dquote}
     243             : 
     244             : /* Quoted string with Unicode escapes */
     245             : xusstart        [uU]&{quote}
     246             : 
     247             : /* error rule to avoid backup */
     248             : xufailed        [uU]&
     249             : 
     250             : 
     251             : /* C-style comments
     252             :  *
     253             :  * The "extended comment" syntax closely resembles allowable operator syntax.
     254             :  * The tricky part here is to get lex to recognize a string starting with
     255             :  * slash-star as a comment, when interpreting it as an operator would produce
     256             :  * a longer match --- remember lex will prefer a longer match!  Also, if we
     257             :  * have something like plus-slash-star, lex will think this is a 3-character
     258             :  * operator whereas we want to see it as a + operator and a comment start.
     259             :  * The solution is two-fold:
     260             :  * 1. append {op_chars}* to xcstart so that it matches as much text as
     261             :  *    {operator} would. Then the tie-breaker (first matching rule of same
     262             :  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
     263             :  *    in case it contains a star-slash that should terminate the comment.
     264             :  * 2. In the operator rule, check for slash-star within the operator, and
     265             :  *    if found throw it back with yyless().  This handles the plus-slash-star
     266             :  *    problem.
     267             :  * Dash-dash comments have similar interactions with the operator rule.
     268             :  */
     269             : xcstart         \/\*{op_chars}*
     270             : xcstop          \*+\/
     271             : xcinside        [^*/]+
     272             : 
     273             : ident_start     [A-Za-z\200-\377_]
     274             : ident_cont      [A-Za-z\200-\377_0-9\$]
     275             : 
     276             : identifier      {ident_start}{ident_cont}*
     277             : 
     278             : /* Assorted special-case operators and operator-like tokens */
     279             : typecast        "::"
     280             : dot_dot         \.\.
     281             : colon_equals    ":="
     282             : 
     283             : /*
     284             :  * These operator-like tokens (unlike the above ones) also match the {operator}
     285             :  * rule, which means that they might be overridden by a longer match if they
     286             :  * are followed by a comment start or a + or - character. Accordingly, if you
     287             :  * add to this list, you must also add corresponding code to the {operator}
     288             :  * block to return the correct token in such cases. (This is not needed in
     289             :  * psqlscan.l since the token value is ignored there.)
     290             :  */
     291             : equals_greater  "=>"
     292             : less_equals     "<="
     293             : greater_equals  ">="
     294             : less_greater    "<>"
     295             : not_equals      "!="
     296             : 
     297             : /*
     298             :  * "self" is the set of chars that should be returned as single-character
     299             :  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
     300             :  * which can be one or more characters long (but if a single-char token
     301             :  * appears in the "self" set, it is not to be returned as an Op).  Note
     302             :  * that the sets overlap, but each has some chars that are not in the other.
     303             :  *
     304             :  * If you change either set, adjust the character lists appearing in the
     305             :  * rule for "operator"!
     306             :  */
     307             : self            [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
     308             : op_chars        [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
     309             : operator        {op_chars}+
     310             : 
     311             : /*
     312             :  * Numbers
     313             :  *
     314             :  * Unary minus is not part of a number here.  Instead we pass it separately to
     315             :  * the parser, and there it gets coerced via doNegate().
     316             :  *
     317             :  * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
     318             :  *
     319             :  * {realfail} is added to prevent the need for scanner
     320             :  * backup when the {real} rule fails to match completely.
     321             :  */
     322             : decdigit        [0-9]
     323             : hexdigit        [0-9A-Fa-f]
     324             : octdigit        [0-7]
     325             : bindigit        [0-1]
     326             : 
     327             : decinteger      {decdigit}(_?{decdigit})*
     328             : hexinteger      0[xX](_?{hexdigit})+
     329             : octinteger      0[oO](_?{octdigit})+
     330             : bininteger      0[bB](_?{bindigit})+
     331             : 
     332             : hexfail         0[xX]_?
     333             : octfail         0[oO]_?
     334             : binfail         0[bB]_?
     335             : 
     336             : numeric         (({decinteger}\.{decinteger}?)|(\.{decinteger}))
     337             : numericfail     {decinteger}\.\.
     338             : 
     339             : real            ({decinteger}|{numeric})[Ee][-+]?{decinteger}
     340             : realfail        ({decinteger}|{numeric})[Ee][-+]
     341             : 
     342             : /* Positional parameters don't accept underscores. */
     343             : param           \${decdigit}+
     344             : 
     345             : /*
     346             :  * An identifier immediately following an integer literal is disallowed because
     347             :  * in some cases it's ambiguous what is meant: for example, 0x1234 could be
     348             :  * either a hexinteger or a decinteger "0" and an identifier "x1234".  We can
     349             :  * detect such problems by seeing if integer_junk matches a longer substring
     350             :  * than any of the XXXinteger patterns (decinteger, hexinteger, octinteger,
     351             :  * bininteger).  One "junk" pattern is sufficient because
     352             :  * {decinteger}{identifier} will match all the same strings we'd match with
     353             :  * {hexinteger}{identifier} etc.
     354             :  *
     355             :  * Note that the rule for integer_junk must appear after the ones for
     356             :  * XXXinteger to make this work correctly: 0x1234 will match both hexinteger
     357             :  * and integer_junk, and we need hexinteger to be chosen in that case.
     358             :  *
     359             :  * Also disallow strings matched by numeric_junk, real_junk and param_junk
     360             :  * for consistency.
     361             :  */
     362             : integer_junk    {decinteger}{identifier}
     363             : numeric_junk    {numeric}{identifier}
     364             : real_junk       {real}{identifier}
     365             : param_junk      \${decdigit}+{identifier}
     366             : 
     367             : /* psql-specific: characters allowed in variable names */
     368             : variable_char   [A-Za-z\200-\377_0-9]
     369             : 
     370             : other           .
     371             : 
     372             : /*
     373             :  * Dollar quoted strings are totally opaque, and no escaping is done on them.
     374             :  * Other quoted strings must allow some special characters such as single-quote
     375             :  *  and newline.
     376             :  * Embedded single-quotes are implemented both in the SQL standard
     377             :  *  style of two adjacent single quotes "''" and in the Postgres/Java style
     378             :  *  of escaped-quote "\'".
     379             :  * Other embedded escaped characters are matched explicitly and the leading
     380             :  *  backslash is dropped from the string.
     381             :  * Note that xcstart must appear before operator, as explained above!
     382             :  *  Also whitespace (comment) must appear before operator.
     383             :  */
     384             : 
     385             : %%
     386             : 
     387             : %{
     388             :         /* Declare some local variables inside yylex(), for convenience */
     389             :         PsqlScanState cur_state = yyextra;
     390             :         PQExpBuffer output_buf = cur_state->output_buf;
     391             : 
     392             :         /*
     393             :          * Force flex into the state indicated by start_state.  This has a
     394             :          * couple of purposes: it lets some of the functions below set a new
     395             :          * starting state without ugly direct access to flex variables, and it
     396             :          * allows us to transition from one flex lexer to another so that we
     397             :          * can lex different parts of the source string using separate lexers.
     398             :          */
     399             :         BEGIN(cur_state->start_state);
     400             : %}
     401             : 
     402             : {whitespace}    {
     403             :                     /*
     404             :                      * Note that the whitespace rule includes both true
     405             :                      * whitespace and single-line ("--" style) comments.
     406             :                      * We suppress whitespace until we have collected some
     407             :                      * non-whitespace data.  (This interacts with some
     408             :                      * decisions in MainLoop(); see there for details.)
     409             :                      */
     410             :                     if (output_buf->len > 0)
     411             :                         ECHO;
     412             :                 }
     413             : 
     414             : {xcstart}       {
     415             :                     cur_state->xcdepth = 0;
     416             :                     BEGIN(xc);
     417             :                     /* Put back any characters past slash-star; see above */
     418             :                     yyless(2);
     419             :                     ECHO;
     420             :                 }
     421             : 
     422             : <xc>{
     423             : {xcstart}       {
     424             :                     cur_state->xcdepth++;
     425             :                     /* Put back any characters past slash-star; see above */
     426             :                     yyless(2);
     427             :                     ECHO;
     428             :                 }
     429             : 
     430             : {xcstop}        {
     431             :                     if (cur_state->xcdepth <= 0)
     432             :                         BEGIN(INITIAL);
     433             :                     else
     434             :                         cur_state->xcdepth--;
     435             :                     ECHO;
     436             :                 }
     437             : 
     438             : {xcinside}      {
     439             :                     ECHO;
     440             :                 }
     441             : 
     442             : {op_chars}      {
     443             :                     ECHO;
     444             :                 }
     445             : 
     446             : \*+             {
     447             :                     ECHO;
     448             :                 }
     449             : } /* <xc> */
     450             : 
     451             : {xbstart}       {
     452             :                     BEGIN(xb);
     453             :                     ECHO;
     454             :                 }
     455             : <xh>{xhinside}    |
     456             : <xb>{xbinside}    {
     457             :                     ECHO;
     458             :                 }
     459             : 
     460             : {xhstart}       {
     461             :                     /* Hexadecimal bit type.
     462             :                      * At some point we should simply pass the string
     463             :                      * forward to the parser and label it there.
     464             :                      * In the meantime, place a leading "x" on the string
     465             :                      * to mark it for the input routine as a hex string.
     466             :                      */
     467             :                     BEGIN(xh);
     468             :                     ECHO;
     469             :                 }
     470             : 
     471             : {xnstart}       {
     472             :                     yyless(1);  /* eat only 'n' this time */
     473             :                     ECHO;
     474             :                 }
     475             : 
     476             : {xqstart}       {
     477             :                     if (cur_state->std_strings)
     478             :                         BEGIN(xq);
     479             :                     else
     480             :                         BEGIN(xe);
     481             :                     ECHO;
     482             :                 }
     483             : {xestart}       {
     484             :                     BEGIN(xe);
     485             :                     ECHO;
     486             :                 }
     487             : {xusstart}      {
     488             :                     BEGIN(xus);
     489             :                     ECHO;
     490             :                 }
     491             : 
     492             : <xb,xh,xq,xe,xus>{quote} {
     493             :                     /*
     494             :                      * When we are scanning a quoted string and see an end
     495             :                      * quote, we must look ahead for a possible continuation.
     496             :                      * If we don't see one, we know the end quote was in fact
     497             :                      * the end of the string.  To reduce the lexer table size,
     498             :                      * we use a single "xqs" state to do the lookahead for all
     499             :                      * types of strings.
     500             :                      */
     501             :                     cur_state->state_before_str_stop = YYSTATE;
     502             :                     BEGIN(xqs);
     503             :                     ECHO;
     504             :                 }
     505             : <xqs>{quotecontinue} {
     506             :                     /*
     507             :                      * Found a quote continuation, so return to the in-quote
     508             :                      * state and continue scanning the literal.  Nothing is
     509             :                      * added to the literal's contents.
     510             :                      */
     511             :                     BEGIN(cur_state->state_before_str_stop);
     512             :                     ECHO;
     513             :                 }
     514             : <xqs>{quotecontinuefail} |
     515             : <xqs>{other}  {
     516             :                     /*
     517             :                      * Failed to see a quote continuation.  Throw back
     518             :                      * everything after the end quote, and handle the string
     519             :                      * according to the state we were in previously.
     520             :                      */
     521             :                     yyless(0);
     522             :                     BEGIN(INITIAL);
     523             :                     /* There's nothing to echo ... */
     524             :                 }
     525             : 
     526             : <xq,xe,xus>{xqdouble} {
     527             :                     ECHO;
     528             :                 }
     529             : <xq,xus>{xqinside}  {
     530             :                     ECHO;
     531             :                 }
     532             : <xe>{xeinside}  {
     533             :                     ECHO;
     534             :                 }
     535             : <xe>{xeunicode} {
     536             :                     ECHO;
     537             :                 }
     538             : <xe>{xeunicodefail}   {
     539             :                     ECHO;
     540             :                 }
     541             : <xe>{xeescape}  {
     542             :                     ECHO;
     543             :                 }
     544             : <xe>{xeoctesc}  {
     545             :                     ECHO;
     546             :                 }
     547             : <xe>{xehexesc}  {
     548             :                     ECHO;
     549             :                 }
     550             : <xe>.         {
     551             :                     /* This is only needed for \ just before EOF */
     552             :                     ECHO;
     553             :                 }
     554             : 
     555             : {dolqdelim}     {
     556             :                     cur_state->dolqstart = pg_strdup(yytext);
     557             :                     BEGIN(xdolq);
     558             :                     ECHO;
     559             :                 }
     560             : {dolqfailed}    {
     561             :                     /* throw back all but the initial "$" */
     562             :                     yyless(1);
     563             :                     ECHO;
     564             :                 }
     565             : <xdolq>{dolqdelim} {
     566             :                     if (strcmp(yytext, cur_state->dolqstart) == 0)
     567             :                     {
     568             :                         free(cur_state->dolqstart);
     569             :                         cur_state->dolqstart = NULL;
     570             :                         BEGIN(INITIAL);
     571             :                     }
     572             :                     else
     573             :                     {
     574             :                         /*
     575             :                          * When we fail to match $...$ to dolqstart, transfer
     576             :                          * the $... part to the output, but put back the final
     577             :                          * $ for rescanning.  Consider $delim$...$junk$delim$
     578             :                          */
     579             :                         yyless(yyleng - 1);
     580             :                     }
     581             :                     ECHO;
     582             :                 }
     583             : <xdolq>{dolqinside} {
     584             :                     ECHO;
     585             :                 }
     586             : <xdolq>{dolqfailed} {
     587             :                     ECHO;
     588             :                 }
     589             : <xdolq>.      {
     590             :                     /* This is only needed for $ inside the quoted text */
     591             :                     ECHO;
     592             :                 }
     593             : 
     594             : {xdstart}       {
     595             :                     BEGIN(xd);
     596             :                     ECHO;
     597             :                 }
     598             : {xuistart}      {
     599             :                     BEGIN(xui);
     600             :                     ECHO;
     601             :                 }
     602             : <xd>{xdstop}  {
     603             :                     BEGIN(INITIAL);
     604             :                     ECHO;
     605             :                 }
     606             : <xui>{dquote} {
     607             :                     BEGIN(INITIAL);
     608             :                     ECHO;
     609             :                 }
     610             : <xd,xui>{xddouble}    {
     611             :                     ECHO;
     612             :                 }
     613             : <xd,xui>{xdinside}    {
     614             :                     ECHO;
     615             :                 }
     616             : 
     617             : {xufailed}  {
     618             :                     /* throw back all but the initial u/U */
     619             :                     yyless(1);
     620             :                     ECHO;
     621             :                 }
     622             : 
     623             : {typecast}      {
     624             :                     ECHO;
     625             :                 }
     626             : 
     627             : {dot_dot}       {
     628             :                     ECHO;
     629             :                 }
     630             : 
     631             : {colon_equals}  {
     632             :                     ECHO;
     633             :                 }
     634             : 
     635             : {equals_greater} {
     636             :                     ECHO;
     637             :                 }
     638             : 
     639             : {less_equals}   {
     640             :                     ECHO;
     641             :                 }
     642             : 
     643             : {greater_equals} {
     644             :                     ECHO;
     645             :                 }
     646             : 
     647             : {less_greater}  {
     648             :                     ECHO;
     649             :                 }
     650             : 
     651             : {not_equals}    {
     652             :                     ECHO;
     653             :                 }
     654             : 
     655             :     /*
     656             :      * These rules are specific to psql --- they implement parenthesis
     657             :      * counting and detection of command-ending semicolon.  These must
     658             :      * appear before the {self} rule so that they take precedence over it.
     659             :      */
     660             : 
     661             : "("               {
     662             :                     cur_state->paren_depth++;
     663             :                     ECHO;
     664             :                 }
     665             : 
     666             : ")"               {
     667             :                     if (cur_state->paren_depth > 0)
     668             :                         cur_state->paren_depth--;
     669             :                     ECHO;
     670             :                 }
     671             : 
     672             : ";"               {
     673             :                     ECHO;
     674             :                     if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
     675             :                     {
     676             :                         /* Terminate lexing temporarily */
     677             :                         cur_state->start_state = YY_START;
     678             :                         cur_state->identifier_count = 0;
     679             :                         return LEXRES_SEMI;
     680             :                     }
     681             :                 }
     682             : 
     683             :     /*
     684             :      * psql-specific rules to handle backslash commands and variable
     685             :      * substitution.  We want these before {self}, also.
     686             :      */
     687             : 
     688             : "\\"[;:]      {
     689             :                     /* Force a semi-colon or colon into the query buffer */
     690             :                     psqlscan_emit(cur_state, yytext + 1, 1);
     691             :                     if (yytext[1] == ';')
     692             :                         cur_state->identifier_count = 0;
     693             :                 }
     694             : 
     695             : "\\"          {
     696             :                     /* Terminate lexing temporarily */
     697             :                     cur_state->start_state = YY_START;
     698             :                     return LEXRES_BACKSLASH;
     699             :                 }
     700             : 
     701             : :{variable_char}+   {
     702             :                     /* Possible psql variable substitution */
     703             :                     char       *varname;
     704             :                     char       *value;
     705             : 
     706             :                     varname = psqlscan_extract_substring(cur_state,
     707             :                                                          yytext + 1,
     708             :                                                          yyleng - 1);
     709             :                     if (cur_state->callbacks->get_variable)
     710             :                         value = cur_state->callbacks->get_variable(varname,
     711             :                                                                    PQUOTE_PLAIN,
     712             :                                                                    cur_state->cb_passthrough);
     713             :                     else
     714             :                         value = NULL;
     715             : 
     716             :                     if (value)
     717             :                     {
     718             :                         /* It is a variable, check for recursion */
     719             :                         if (psqlscan_var_is_current_source(cur_state, varname))
     720             :                         {
     721             :                             /* Recursive expansion --- don't go there */
     722             :                             pg_log_warning("skipping recursive expansion of variable \"%s\"",
     723             :                                                               varname);
     724             :                             /* Instead copy the string as is */
     725             :                             ECHO;
     726             :                         }
     727             :                         else
     728             :                         {
     729             :                             /* OK, perform substitution */
     730             :                             psqlscan_push_new_buffer(cur_state, value, varname);
     731             :                             /* yy_scan_string already made buffer active */
     732             :                         }
     733             :                         free(value);
     734             :                     }
     735             :                     else
     736             :                     {
     737             :                         /*
     738             :                          * if the variable doesn't exist we'll copy the string
     739             :                          * as is
     740             :                          */
     741             :                         ECHO;
     742             :                     }
     743             : 
     744             :                     free(varname);
     745             :                 }
     746             : 
     747             : :'{variable_char}+' {
     748             :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     749             :                                              PQUOTE_SQL_LITERAL);
     750             :                 }
     751             : 
     752             : :\"{variable_char}+\" {
     753             :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     754             :                                              PQUOTE_SQL_IDENT);
     755             :                 }
     756             : 
     757             : :\{\?{variable_char}+\} {
     758             :                     psqlscan_test_variable(cur_state, yytext, yyleng);
     759             :                 }
     760             : 
     761             :     /*
     762             :      * These rules just avoid the need for scanner backup if one of the
     763             :      * three rules above fails to match completely.
     764             :      */
     765             : 
     766             : :'{variable_char}*  {
     767             :                     /* Throw back everything but the colon */
     768             :                     yyless(1);
     769             :                     ECHO;
     770             :                 }
     771             : 
     772             : :\"{variable_char}*    {
     773             :                     /* Throw back everything but the colon */
     774             :                     yyless(1);
     775             :                     ECHO;
     776             :                 }
     777             : 
     778             : :\{\?{variable_char}*   {
     779             :                     /* Throw back everything but the colon */
     780             :                     yyless(1);
     781             :                     ECHO;
     782             :                 }
     783             : :\{ {
     784             :                     /* Throw back everything but the colon */
     785             :                     yyless(1);
     786             :                     ECHO;
     787             :                 }
     788             : 
     789             :     /*
     790             :      * Back to backend-compatible rules.
     791             :      */
     792             : 
     793             : {self}          {
     794             :                     ECHO;
     795             :                 }
     796             : 
     797             : {operator}      {
     798             :                     /*
     799             :                      * Check for embedded slash-star or dash-dash; those
     800             :                      * are comment starts, so operator must stop there.
     801             :                      * Note that slash-star or dash-dash at the first
     802             :                      * character will match a prior rule, not this one.
     803             :                      */
     804             :                     int         nchars = yyleng;
     805             :                     char       *slashstar = strstr(yytext, "/*");
     806             :                     char       *dashdash = strstr(yytext, "--");
     807             : 
     808             :                     if (slashstar && dashdash)
     809             :                     {
     810             :                         /* if both appear, take the first one */
     811             :                         if (slashstar > dashdash)
     812             :                             slashstar = dashdash;
     813             :                     }
     814             :                     else if (!slashstar)
     815             :                         slashstar = dashdash;
     816             :                     if (slashstar)
     817             :                         nchars = slashstar - yytext;
     818             : 
     819             :                     /*
     820             :                      * For SQL compatibility, '+' and '-' cannot be the
     821             :                      * last char of a multi-char operator unless the operator
     822             :                      * contains chars that are not in SQL operators.
     823             :                      * The idea is to lex '=-' as two operators, but not
     824             :                      * to forbid operator names like '?-' that could not be
     825             :                      * sequences of SQL operators.
     826             :                      */
     827             :                     if (nchars > 1 &&
     828             :                         (yytext[nchars - 1] == '+' ||
     829             :                          yytext[nchars - 1] == '-'))
     830             :                     {
     831             :                         int         ic;
     832             : 
     833             :                         for (ic = nchars - 2; ic >= 0; ic--)
     834             :                         {
     835             :                             char c = yytext[ic];
     836             :                             if (c == '~' || c == '!' || c == '@' ||
     837             :                                 c == '#' || c == '^' || c == '&' ||
     838             :                                 c == '|' || c == '`' || c == '?' ||
     839             :                                 c == '%')
     840             :                                 break;
     841             :                         }
     842             :                         if (ic < 0)
     843             :                         {
     844             :                             /*
     845             :                              * didn't find a qualifying character, so remove
     846             :                              * all trailing [+-]
     847             :                              */
     848             :                             do {
     849             :                                 nchars--;
     850             :                             } while (nchars > 1 &&
     851             :                                  (yytext[nchars - 1] == '+' ||
     852             :                                   yytext[nchars - 1] == '-'));
     853             :                         }
     854             :                     }
     855             : 
     856             :                     if (nchars < yyleng)
     857             :                     {
     858             :                         /* Strip the unwanted chars from the token */
     859             :                         yyless(nchars);
     860             :                     }
     861             :                     ECHO;
     862             :                 }
     863             : 
     864             : {param}         {
     865             :                     ECHO;
     866             :                 }
     867             : {param_junk}    {
     868             :                     ECHO;
     869             :                 }
     870             : 
     871             : {decinteger}    {
     872             :                     ECHO;
     873             :                 }
     874             : {hexinteger}    {
     875             :                     ECHO;
     876             :                 }
     877             : {octinteger}    {
     878             :                     ECHO;
     879             :                 }
     880             : {bininteger}    {
     881             :                     ECHO;
     882             :                 }
     883             : {hexfail}       {
     884             :                     ECHO;
     885             :                 }
     886             : {octfail}       {
     887             :                     ECHO;
     888             :                 }
     889             : {binfail}       {
     890             :                     ECHO;
     891             :                 }
     892             : {numeric}       {
     893             :                     ECHO;
     894             :                 }
     895             : {numericfail}   {
     896             :                     /* throw back the .., and treat as integer */
     897             :                     yyless(yyleng - 2);
     898             :                     ECHO;
     899             :                 }
     900             : {real}          {
     901             :                     ECHO;
     902             :                 }
     903             : {realfail}      {
     904             :                     ECHO;
     905             :                 }
     906             : {integer_junk}  {
     907             :                     ECHO;
     908             :                 }
     909             : {numeric_junk}  {
     910             :                     ECHO;
     911             :                 }
     912             : {real_junk}     {
     913             :                     ECHO;
     914             :                 }
     915             : 
     916             : 
     917             : {identifier}    {
     918             :                     /*
     919             :                      * We need to track if we are inside a BEGIN .. END block
     920             :                      * in a function definition, so that semicolons contained
     921             :                      * therein don't terminate the whole statement.  Short of
     922             :                      * writing a full parser here, the following heuristic
     923             :                      * should work.  First, we track whether the beginning of
     924             :                      * the statement matches CREATE [OR REPLACE]
     925             :                      * {FUNCTION|PROCEDURE}
     926             :                      */
     927             : 
     928             :                     if (cur_state->identifier_count == 0)
     929             :                         memset(cur_state->identifiers, 0, sizeof(cur_state->identifiers));
     930             : 
     931             :                     if (pg_strcasecmp(yytext, "create") == 0 ||
     932             :                         pg_strcasecmp(yytext, "function") == 0 ||
     933             :                         pg_strcasecmp(yytext, "procedure") == 0 ||
     934             :                         pg_strcasecmp(yytext, "or") == 0 ||
     935             :                         pg_strcasecmp(yytext, "replace") == 0)
     936             :                     {
     937             :                         if (cur_state->identifier_count < sizeof(cur_state->identifiers))
     938             :                             cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned char) yytext[0]);
     939             :                     }
     940             : 
     941             :                     cur_state->identifier_count++;
     942             : 
     943             :                     if (cur_state->identifiers[0] == 'c' &&
     944             :                         (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' ||
     945             :                          (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' &&
     946             :                           (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p'))) &&
     947             :                         cur_state->paren_depth == 0)
     948             :                     {
     949             :                         if (pg_strcasecmp(yytext, "begin") == 0)
     950             :                             cur_state->begin_depth++;
     951             :                         else if (pg_strcasecmp(yytext, "case") == 0)
     952             :                         {
     953             :                             /*
     954             :                              * CASE also ends with END.  We only need to track
     955             :                              * this if we are already inside a BEGIN.
     956             :                              */
     957             :                             if (cur_state->begin_depth >= 1)
     958             :                                 cur_state->begin_depth++;
     959             :                         }
     960             :                         else if (pg_strcasecmp(yytext, "end") == 0)
     961             :                         {
     962             :                             if (cur_state->begin_depth > 0)
     963             :                                 cur_state->begin_depth--;
     964             :                         }
     965             :                     }
     966             : 
     967             :                     ECHO;
     968             :                 }
     969             : 
     970             : {other}         {
     971             :                     ECHO;
     972             :                 }
     973             : 
     974             : <<EOF>>         {
     975             :                     if (cur_state->buffer_stack == NULL)
     976             :                     {
     977             :                         cur_state->start_state = YY_START;
     978             :                         return LEXRES_EOL;      /* end of input reached */
     979             :                     }
     980             : 
     981             :                     /*
     982             :                      * We were expanding a variable, so pop the inclusion
     983             :                      * stack and keep lexing
     984             :                      */
     985             :                     psqlscan_pop_buffer_stack(cur_state);
     986             :                     psqlscan_select_top_buffer(cur_state);
     987             :                 }
     988             : 
     989             : %%
     990             : 
     991             : /* LCOV_EXCL_STOP */
     992             : 
     993             : /*
     994             :  * Create a lexer working state struct.
     995             :  *
     996             :  * callbacks is a struct of function pointers that encapsulate some
     997             :  * behavior we need from the surrounding program.  This struct must
     998             :  * remain valid for the lifespan of the PsqlScanState.
     999             :  */
    1000             : PsqlScanState
    1001             : psql_scan_create(const PsqlScanCallbacks *callbacks)
    1002       18728 : {
    1003             :     PsqlScanState state;
    1004             : 
    1005             :     state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
    1006       18728 : 
    1007             :     state->callbacks = callbacks;
    1008       18728 : 
    1009             :     yylex_init(&state->scanner);
    1010       18728 : 
    1011             :     yyset_extra(state, state->scanner);
    1012       18728 : 
    1013             :     psql_scan_reset(state);
    1014       18728 : 
    1015             :     return state;
    1016       18728 : }
    1017             : 
    1018             : /*
    1019             :  * Destroy a lexer working state struct, releasing all resources.
    1020             :  */
    1021             : void
    1022             : psql_scan_destroy(PsqlScanState state)
    1023       18626 : {
    1024             :     psql_scan_finish(state);
    1025       18626 : 
    1026             :     psql_scan_reset(state);
    1027       18626 : 
    1028             :     yylex_destroy(state->scanner);
    1029       18626 : 
    1030             :     free(state);
    1031       18626 : }
    1032       18626 : 
    1033             : /*
    1034             :  * Set the callback passthrough pointer for the lexer.
    1035             :  *
    1036             :  * This could have been integrated into psql_scan_create, but keeping it
    1037             :  * separate allows the application to change the pointer later, which might
    1038             :  * be useful.
    1039             :  */
    1040             : void
    1041             : psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
    1042       18166 : {
    1043             :     state->cb_passthrough = passthrough;
    1044       18166 : }
    1045       18166 : 
    1046             : /*
    1047             :  * Set up to perform lexing of the given input line.
    1048             :  *
    1049             :  * The text at *line, extending for line_len bytes, will be scanned by
    1050             :  * subsequent calls to the psql_scan routines.  psql_scan_finish should
    1051             :  * be called when scanning is complete.  Note that the lexer retains
    1052             :  * a pointer to the storage at *line --- this string must not be altered
    1053             :  * or freed until after psql_scan_finish is called.
    1054             :  *
    1055             :  * encoding is the libpq identifier for the character encoding in use,
    1056             :  * and std_strings says whether standard_conforming_strings is on.
    1057             :  */
    1058             : void
    1059             : psql_scan_setup(PsqlScanState state,
    1060      634666 :                 const char *line, int line_len,
    1061             :                 int encoding, bool std_strings)
    1062             : {
    1063             :     /* Mustn't be scanning already */
    1064             :     Assert(state->scanbufhandle == NULL);
    1065             :     Assert(state->buffer_stack == NULL);
    1066             : 
    1067             :     /* Do we need to hack the character set encoding? */
    1068             :     state->encoding = encoding;
    1069      634666 :     state->safe_encoding = pg_valid_server_encoding_id(encoding);
    1070      634666 : 
    1071             :     /* Save standard-strings flag as well */
    1072             :     state->std_strings = std_strings;
    1073      634666 : 
    1074             :     /* Set up flex input buffer with appropriate translation and padding */
    1075             :     state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
    1076      634666 :                                                    &state->scanbuf);
    1077             :     state->scanline = line;
    1078      634666 : 
    1079             :     /* Set lookaside data in case we have to map unsafe encoding */
    1080             :     state->curline = state->scanbuf;
    1081      634666 :     state->refline = state->scanline;
    1082      634666 : }
    1083      634666 : 
    1084             : /*
    1085             :  * Do lexical analysis of SQL command text.
    1086             :  *
    1087             :  * The text previously passed to psql_scan_setup is scanned, and appended
    1088             :  * (possibly with transformation) to query_buf.
    1089             :  *
    1090             :  * The return value indicates the condition that stopped scanning:
    1091             :  *
    1092             :  * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
    1093             :  * transferred to query_buf.)  The command accumulated in query_buf should
    1094             :  * be executed, then clear query_buf and call again to scan the remainder
    1095             :  * of the line.
    1096             :  *
    1097             :  * PSCAN_BACKSLASH: found a backslash that starts a special command.
    1098             :  * Any previous data on the line has been transferred to query_buf.
    1099             :  * The caller will typically next apply a separate flex lexer to scan
    1100             :  * the special command.
    1101             :  *
    1102             :  * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
    1103             :  * incomplete SQL command.  *prompt is set to the appropriate prompt type.
    1104             :  *
    1105             :  * PSCAN_EOL: the end of the line was reached, and there is no lexical
    1106             :  * reason to consider the command incomplete.  The caller may or may not
    1107             :  * choose to send it.  *prompt is set to the appropriate prompt type if
    1108             :  * the caller chooses to collect more input.
    1109             :  *
    1110             :  * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
    1111             :  * be called next, then the cycle may be repeated with a fresh input line.
    1112             :  *
    1113             :  * In all cases, *prompt is set to an appropriate prompt type code for the
    1114             :  * next line-input operation.
    1115             :  */
    1116             : PsqlScanResult
    1117             : psql_scan(PsqlScanState state,
    1118      969102 :           PQExpBuffer query_buf,
    1119             :           promptStatus_t *prompt)
    1120             : {
    1121             :     PsqlScanResult result;
    1122             :     int         lexresult;
    1123             : 
    1124             :     /* Must be scanning already */
    1125             :     Assert(state->scanbufhandle != NULL);
    1126             : 
    1127             :     /* Set current output target */
    1128             :     state->output_buf = query_buf;
    1129      969102 : 
    1130             :     /* Set input source */
    1131             :     if (state->buffer_stack != NULL)
    1132      969102 :         yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
    1133          90 :     else
    1134             :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1135      969012 : 
    1136             :     /* And lex. */
    1137             :     lexresult = yylex(NULL, state->scanner);
    1138      969102 : 
    1139             :     /*
    1140             :      * Check termination state and return appropriate result info.
    1141             :      */
    1142             :     switch (lexresult)
    1143      969102 :     {
    1144             :         case LEXRES_EOL:        /* end of input */
    1145      634408 :             switch (state->start_state)
    1146      634408 :             {
    1147             :                 case INITIAL:
    1148      587434 :                 case xqs:       /* we treat this like INITIAL */
    1149             :                     if (state->paren_depth > 0)
    1150      587434 :                     {
    1151             :                         result = PSCAN_INCOMPLETE;
    1152       48256 :                         *prompt = PROMPT_PAREN;
    1153       48256 :                     }
    1154             :                     else if (state->begin_depth > 0)
    1155      539178 :                     {
    1156             :                         result = PSCAN_INCOMPLETE;
    1157         518 :                         *prompt = PROMPT_CONTINUE;
    1158         518 :                     }
    1159             :                     else if (query_buf->len > 0)
    1160      538660 :                     {
    1161             :                         result = PSCAN_EOL;
    1162      108264 :                         *prompt = PROMPT_CONTINUE;
    1163      108264 :                     }
    1164             :                     else
    1165             :                     {
    1166             :                         /* never bother to send an empty buffer */
    1167             :                         result = PSCAN_INCOMPLETE;
    1168      430396 :                         *prompt = PROMPT_READY;
    1169      430396 :                     }
    1170             :                     break;
    1171      587434 :                 case xb:
    1172           0 :                     result = PSCAN_INCOMPLETE;
    1173           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1174           0 :                     break;
    1175           0 :                 case xc:
    1176         794 :                     result = PSCAN_INCOMPLETE;
    1177         794 :                     *prompt = PROMPT_COMMENT;
    1178         794 :                     break;
    1179         794 :                 case xd:
    1180          26 :                     result = PSCAN_INCOMPLETE;
    1181          26 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1182          26 :                     break;
    1183          26 :                 case xh:
    1184           0 :                     result = PSCAN_INCOMPLETE;
    1185           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1186           0 :                     break;
    1187           0 :                 case xe:
    1188         602 :                     result = PSCAN_INCOMPLETE;
    1189         602 :                     *prompt = PROMPT_SINGLEQUOTE;
    1190         602 :                     break;
    1191         602 :                 case xq:
    1192        9502 :                     result = PSCAN_INCOMPLETE;
    1193        9502 :                     *prompt = PROMPT_SINGLEQUOTE;
    1194        9502 :                     break;
    1195        9502 :                 case xdolq:
    1196       36050 :                     result = PSCAN_INCOMPLETE;
    1197       36050 :                     *prompt = PROMPT_DOLLARQUOTE;
    1198       36050 :                     break;
    1199       36050 :                 case xui:
    1200           0 :                     result = PSCAN_INCOMPLETE;
    1201           0 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1202           0 :                     break;
    1203           0 :                 case xus:
    1204           0 :                     result = PSCAN_INCOMPLETE;
    1205           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1206           0 :                     break;
    1207           0 :                 default:
    1208           0 :                     /* can't get here */
    1209             :                     fprintf(stderr, "invalid YY_START\n");
    1210           0 :                     exit(1);
    1211           0 :             }
    1212             :             break;
    1213      634408 :         case LEXRES_SEMI:       /* semicolon */
    1214      319628 :             result = PSCAN_SEMICOLON;
    1215      319628 :             *prompt = PROMPT_READY;
    1216      319628 :             break;
    1217      319628 :         case LEXRES_BACKSLASH:  /* backslash */
    1218       15066 :             result = PSCAN_BACKSLASH;
    1219       15066 :             *prompt = PROMPT_READY;
    1220       15066 :             break;
    1221       15066 :         default:
    1222           0 :             /* can't get here */
    1223             :             fprintf(stderr, "invalid yylex result\n");
    1224           0 :             exit(1);
    1225           0 :     }
    1226             : 
    1227             :     return result;
    1228      969102 : }
    1229             : 
    1230             : /*
    1231             :  * Clean up after scanning a string.  This flushes any unread input and
    1232             :  * releases resources (but not the PsqlScanState itself).  Note however
    1233             :  * that this does not reset the lexer scan state; that can be done by
    1234             :  * psql_scan_reset(), which is an orthogonal operation.
    1235             :  *
    1236             :  * It is legal to call this when not scanning anything (makes it easier
    1237             :  * to deal with error recovery).
    1238             :  */
    1239             : void
    1240             : psql_scan_finish(PsqlScanState state)
    1241      653188 : {
    1242             :     /* Drop any incomplete variable expansions. */
    1243             :     while (state->buffer_stack != NULL)
    1244      653188 :         psqlscan_pop_buffer_stack(state);
    1245           0 : 
    1246             :     /* Done with the outer scan buffer, too */
    1247             :     if (state->scanbufhandle)
    1248      653188 :         yy_delete_buffer(state->scanbufhandle, state->scanner);
    1249      634566 :     state->scanbufhandle = NULL;
    1250      653188 :     if (state->scanbuf)
    1251      653188 :         free(state->scanbuf);
    1252      634566 :     state->scanbuf = NULL;
    1253      653188 : }
    1254      653188 : 
    1255             : /*
    1256             :  * Reset lexer scanning state to start conditions.  This is appropriate
    1257             :  * for executing \r psql commands (or any other time that we discard the
    1258             :  * prior contents of query_buf).  It is not, however, necessary to do this
    1259             :  * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
    1260             :  * PSCAN_EOL scan result, because the scan state must be INITIAL when those
    1261             :  * conditions are returned.
    1262             :  *
    1263             :  * Note that this is unrelated to flushing unread input; that task is
    1264             :  * done by psql_scan_finish().
    1265             :  */
    1266             : void
    1267             : psql_scan_reset(PsqlScanState state)
    1268       38742 : {
    1269             :     state->start_state = INITIAL;
    1270       38742 :     state->paren_depth = 0;
    1271       38742 :     state->xcdepth = 0;          /* not really necessary */
    1272       38742 :     if (state->dolqstart)
    1273       38742 :         free(state->dolqstart);
    1274           0 :     state->dolqstart = NULL;
    1275       38742 :     state->identifier_count = 0;
    1276       38742 :     state->begin_depth = 0;
    1277       38742 : }
    1278       38742 : 
    1279             : /*
    1280             :  * Reselect this lexer (psqlscan.l) after using another one.
    1281             :  *
    1282             :  * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
    1283             :  * state, because we'd never switch to another lexer in a different state.
    1284             :  * However, we don't want to reset e.g. paren_depth, so this can't be
    1285             :  * the same as psql_scan_reset().
    1286             :  *
    1287             :  * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
    1288             :  * must be a superset of this.
    1289             :  *
    1290             :  * Note: it seems likely that other lexers could just assign INITIAL for
    1291             :  * themselves, since that probably has the value zero in every flex-generated
    1292             :  * lexer.  But let's not assume that.
    1293             :  */
    1294             : void
    1295             : psql_scan_reselect_sql_lexer(PsqlScanState state)
    1296       67388 : {
    1297             :     state->start_state = INITIAL;
    1298       67388 : }
    1299       67388 : 
    1300             : /*
    1301             :  * Return true if lexer is currently in an "inside quotes" state.
    1302             :  *
    1303             :  * This is pretty grotty but is needed to preserve the old behavior
    1304             :  * that mainloop.c drops blank lines not inside quotes without even
    1305             :  * echoing them.
    1306             :  */
    1307             : bool
    1308             : psql_scan_in_quote(PsqlScanState state)
    1309      131768 : {
    1310             :     return state->start_state != INITIAL &&
    1311      132736 :         state->start_state != xqs;
    1312         968 : }
    1313             : 
    1314             : /*
    1315             :  * Push the given string onto the stack of stuff to scan.
    1316             :  *
    1317             :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1318             :  */
    1319             : void
    1320             : psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
    1321        1280 :                          const char *varname)
    1322             : {
    1323             :     StackElem  *stackelem;
    1324             : 
    1325             :     stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
    1326        1280 : 
    1327             :     /*
    1328             :      * In current usage, the passed varname points at the current flex input
    1329             :      * buffer; we must copy it before calling psqlscan_prepare_buffer()
    1330             :      * because that will change the buffer state.
    1331             :      */
    1332             :     stackelem->varname = varname ? pg_strdup(varname) : NULL;
    1333        1280 : 
    1334             :     stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
    1335        1280 :                                              &stackelem->bufstring);
    1336             :     state->curline = stackelem->bufstring;
    1337        1280 :     if (state->safe_encoding)
    1338        1280 :     {
    1339             :         stackelem->origstring = NULL;
    1340        1280 :         state->refline = stackelem->bufstring;
    1341        1280 :     }
    1342             :     else
    1343             :     {
    1344             :         stackelem->origstring = pg_strdup(newstr);
    1345           0 :         state->refline = stackelem->origstring;
    1346           0 :     }
    1347             :     stackelem->next = state->buffer_stack;
    1348        1280 :     state->buffer_stack = stackelem;
    1349        1280 : }
    1350        1280 : 
    1351             : /*
    1352             :  * Pop the topmost buffer stack item (there must be one!)
    1353             :  *
    1354             :  * NB: after this, the flex input state is unspecified; caller must
    1355             :  * switch to an appropriate buffer to continue lexing.
    1356             :  * See psqlscan_select_top_buffer().
    1357             :  */
    1358             : void
    1359             : psqlscan_pop_buffer_stack(PsqlScanState state)
    1360        1280 : {
    1361             :     StackElem  *stackelem = state->buffer_stack;
    1362        1280 : 
    1363             :     state->buffer_stack = stackelem->next;
    1364        1280 :     yy_delete_buffer(stackelem->buf, state->scanner);
    1365        1280 :     free(stackelem->bufstring);
    1366        1280 :     if (stackelem->origstring)
    1367        1280 :         free(stackelem->origstring);
    1368           0 :     if (stackelem->varname)
    1369        1280 :         free(stackelem->varname);
    1370        1280 :     free(stackelem);
    1371        1280 : }
    1372        1280 : 
    1373             : /*
    1374             :  * Select the topmost surviving buffer as the active input.
    1375             :  */
    1376             : void
    1377             : psqlscan_select_top_buffer(PsqlScanState state)
    1378        1280 : {
    1379             :     StackElem  *stackelem = state->buffer_stack;
    1380        1280 : 
    1381             :     if (stackelem != NULL)
    1382        1280 :     {
    1383             :         yy_switch_to_buffer(stackelem->buf, state->scanner);
    1384           0 :         state->curline = stackelem->bufstring;
    1385           0 :         state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
    1386           0 :     }
    1387             :     else
    1388             :     {
    1389             :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1390        1280 :         state->curline = state->scanbuf;
    1391        1280 :         state->refline = state->scanline;
    1392        1280 :     }
    1393             : }
    1394        1280 : 
    1395             : /*
    1396             :  * Check if specified variable name is the source for any string
    1397             :  * currently being scanned
    1398             :  */
    1399             : bool
    1400             : psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
    1401        1280 : {
    1402             :     StackElem  *stackelem;
    1403             : 
    1404             :     for (stackelem = state->buffer_stack;
    1405        1280 :          stackelem != NULL;
    1406             :          stackelem = stackelem->next)
    1407           0 :     {
    1408             :         if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
    1409           0 :             return true;
    1410           0 :     }
    1411             :     return false;
    1412        1280 : }
    1413             : 
    1414             : /*
    1415             :  * Set up a flex input buffer to scan the given data.  We always make a
    1416             :  * copy of the data.  If working in an unsafe encoding, the copy has
    1417             :  * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
    1418             :  *
    1419             :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1420             :  */
    1421             : YY_BUFFER_STATE
    1422             : psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
    1423      635946 :                         char **txtcopy)
    1424             : {
    1425             :     char       *newtxt;
    1426             : 
    1427             :     /* Flex wants two \0 characters after the actual data */
    1428             :     newtxt = pg_malloc(len + 2);
    1429      635946 :     *txtcopy = newtxt;
    1430      635946 :     newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
    1431      635946 : 
    1432             :     if (state->safe_encoding)
    1433      635946 :         memcpy(newtxt, txt, len);
    1434      635946 :     else
    1435             :     {
    1436             :         /* Gotta do it the hard way */
    1437             :         int         i = 0;
    1438           0 : 
    1439             :         while (i < len)
    1440           0 :         {
    1441             :             int         thislen = PQmblen(txt + i, state->encoding);
    1442           0 : 
    1443             :             /* first byte should always be okay... */
    1444             :             newtxt[i] = txt[i];
    1445           0 :             i++;
    1446           0 :             while (--thislen > 0 && i < len)
    1447           0 :                 newtxt[i++] = (char) 0xFF;
    1448           0 :         }
    1449             :     }
    1450             : 
    1451             :     return yy_scan_buffer(newtxt, len + 2, state->scanner);
    1452      635946 : }
    1453             : 
    1454             : /*
    1455             :  * psqlscan_emit() --- body for ECHO macro
    1456             :  *
    1457             :  * NB: this must be used for ALL and ONLY the text copied from the flex
    1458             :  * input data.  If you pass it something that is not part of the yytext
    1459             :  * string, you are making a mistake.  Internally generated text can be
    1460             :  * appended directly to state->output_buf.
    1461             :  */
    1462             : void
    1463             : psqlscan_emit(PsqlScanState state, const char *txt, int len)
    1464     7612168 : {
    1465             :     PQExpBuffer output_buf = state->output_buf;
    1466     7612168 : 
    1467             :     if (state->safe_encoding)
    1468     7612168 :         appendBinaryPQExpBuffer(output_buf, txt, len);
    1469     7612168 :     else
    1470             :     {
    1471             :         /* Gotta do it the hard way */
    1472             :         const char *reference = state->refline;
    1473           0 :         int         i;
    1474             : 
    1475             :         reference += (txt - state->curline);
    1476           0 : 
    1477             :         for (i = 0; i < len; i++)
    1478           0 :         {
    1479             :             char        ch = txt[i];
    1480           0 : 
    1481             :             if (ch == (char) 0xFF)
    1482           0 :                 ch = reference[i];
    1483           0 :             appendPQExpBufferChar(output_buf, ch);
    1484           0 :         }
    1485             :     }
    1486             : }
    1487     7612168 : 
    1488             : /*
    1489             :  * psqlscan_extract_substring --- fetch value of (part of) the current token
    1490             :  *
    1491             :  * This is like psqlscan_emit(), except that the data is returned as a
    1492             :  * malloc'd string rather than being pushed directly to state->output_buf.
    1493             :  */
    1494             : char *
    1495             : psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
    1496        4952 : {
    1497             :     char       *result = (char *) pg_malloc(len + 1);
    1498        4952 : 
    1499             :     if (state->safe_encoding)
    1500        4952 :         memcpy(result, txt, len);
    1501        4952 :     else
    1502             :     {
    1503             :         /* Gotta do it the hard way */
    1504             :         const char *reference = state->refline;
    1505           0 :         int         i;
    1506             : 
    1507             :         reference += (txt - state->curline);
    1508           0 : 
    1509             :         for (i = 0; i < len; i++)
    1510           0 :         {
    1511             :             char        ch = txt[i];
    1512           0 : 
    1513             :             if (ch == (char) 0xFF)
    1514           0 :                 ch = reference[i];
    1515           0 :             result[i] = ch;
    1516           0 :         }
    1517             :     }
    1518             :     result[len] = '\0';
    1519        4952 :     return result;
    1520        4952 : }
    1521             : 
    1522             : /*
    1523             :  * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
    1524             :  *
    1525             :  * If the variable name is found, escape its value using the appropriate
    1526             :  * quoting method and emit the value to output_buf.  (Since the result is
    1527             :  * surely quoted, there is never any reason to rescan it.)  If we don't
    1528             :  * find the variable or escaping fails, emit the token as-is.
    1529             :  */
    1530             : void
    1531             : psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
    1532         918 :                          PsqlScanQuoteType quote)
    1533             : {
    1534             :     char       *varname;
    1535             :     char       *value;
    1536             : 
    1537             :     /* Variable lookup. */
    1538             :     varname = psqlscan_extract_substring(state, txt + 2, len - 3);
    1539         918 :     if (state->callbacks->get_variable)
    1540         918 :         value = state->callbacks->get_variable(varname, quote,
    1541         918 :                                                state->cb_passthrough);
    1542             :     else
    1543             :         value = NULL;
    1544           0 :     free(varname);
    1545         918 : 
    1546             :     if (value)
    1547         918 :     {
    1548             :         /* Emit the suitably-escaped value */
    1549             :         appendPQExpBufferStr(state->output_buf, value);
    1550         862 :         free(value);
    1551         862 :     }
    1552             :     else
    1553             :     {
    1554             :         /* Emit original token as-is */
    1555             :         psqlscan_emit(state, txt, len);
    1556          56 :     }
    1557             : }
    1558         918 : 
    1559             : void
    1560             : psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
    1561          32 : {
    1562             :     char       *varname;
    1563             :     char       *value;
    1564             : 
    1565             :     varname = psqlscan_extract_substring(state, txt + 3, len - 4);
    1566          32 :     if (state->callbacks->get_variable)
    1567          32 :         value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
    1568          32 :                                                state->cb_passthrough);
    1569             :     else
    1570             :         value = NULL;
    1571           0 :     free(varname);
    1572          32 : 
    1573             :     if (value != NULL)
    1574          32 :     {
    1575             :         psqlscan_emit(state, "TRUE", 4);
    1576          14 :         free(value);
    1577          14 :     }
    1578             :     else
    1579             :     {
    1580             :         psqlscan_emit(state, "FALSE", 5);
    1581          18 :     }
    1582             : }

Generated by: LCOV version 1.14