LCOV - code coverage report
Current view: top level - src/fe_utils - psqlscan.l (source / functions) Coverage Total Hit
Test: PostgreSQL 19beta1 Lines: 83.7 % 600 502
Test Date: 2026-06-26 01:16:38 Functions: 100.0 % 22 22
Legend: Lines:     hit not hit

            Line data    Source code
       1              : %top{
       2              : /*-------------------------------------------------------------------------
       3              :  *
       4              :  * psqlscan.l
       5              :  *    lexical scanner for SQL commands
       6              :  *
       7              :  * This lexer used to be part of psql, and that heritage is reflected in
       8              :  * the file name as well as function and typedef names, though it can now
       9              :  * be used by other frontend programs as well.  It's also possible to extend
      10              :  * this lexer with a compatible add-on lexer to handle program-specific
      11              :  * backslash commands.
      12              :  *
      13              :  * This code is mainly concerned with determining where the end of a SQL
      14              :  * statement is: we are looking for semicolons that are not within quotes,
      15              :  * comments, or parentheses.  The most reliable way to handle this is to
      16              :  * borrow the backend's flex lexer rules, lock, stock, and barrel.  The rules
      17              :  * below are (except for a few) the same as the backend's, but their actions
      18              :  * are just ECHO whereas the backend's actions generally do other things.
      19              :  *
      20              :  * XXX The rules in this file must be kept in sync with the backend lexer!!!
      21              :  *
      22              :  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
      23              :  *
      24              :  * See psqlscan_int.h for additional commentary.
      25              :  *
      26              :  *
      27              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      28              :  * Portions Copyright (c) 1994, Regents of the University of California
      29              :  *
      30              :  * IDENTIFICATION
      31              :  *    src/fe_utils/psqlscan.l
      32              :  *
      33              :  *-------------------------------------------------------------------------
      34              :  */
      35              : #include "postgres_fe.h"
      36              : 
      37              : #include "common/logging.h"
      38              : #include "fe_utils/psqlscan.h"
      39              : 
      40              : #include "libpq-fe.h"
      41              : }
      42              : 
      43              : %{
      44              : 
      45              : /* LCOV_EXCL_START */
      46              : 
      47              : #include "fe_utils/psqlscan_int.h"
      48              : 
      49              : /*
      50              :  * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
      51              :  * doesn't presently make use of that argument, so just declare it as int.
      52              :  */
      53              : typedef int YYSTYPE;
      54              : 
      55              : 
      56              : /* Return values from yylex() */
      57              : #define LEXRES_EOL          0   /* end of input */
      58              : #define LEXRES_SEMI         1   /* command-terminating semicolon found */
      59              : #define LEXRES_BACKSLASH    2   /* backslash command start */
      60              : 
      61              : 
      62              : #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
      63              : 
      64              : static void psqlscan_track_identifier(PsqlScanState state,
      65              :                                       const char *identifier);
      66              : 
      67              : %}
      68              : 
      69              : %option reentrant
      70              : %option bison-bridge
      71              : %option 8bit
      72              : %option never-interactive
      73              : %option nodefault
      74              : %option noinput
      75              : %option nounput
      76              : %option noyywrap
      77              : %option warn
      78              : %option prefix="psql_yy"
      79              : 
      80              : /*
      81              :  * Set the type of yyextra; we use it as a pointer back to the containing
      82              :  * PsqlScanState.
      83              :  */
      84              : %option extra-type="PsqlScanState"
      85              : 
      86              : /*
      87              :  * All of the following definitions and rules should exactly match
      88              :  * src/backend/parser/scan.l so far as the flex patterns are concerned.
      89              :  * The rule bodies are just ECHO as opposed to what the backend does,
      90              :  * however.  (But be sure to duplicate code that affects the lexing process,
      91              :  * such as BEGIN() and yyless().)  Also, psqlscan uses a single <<EOF>> rule
      92              :  * whereas scan.l has a separate one for each exclusive state.
      93              :  */
      94              : 
      95              : /*
      96              :  * OK, here is a short description of lex/flex rules behavior.
      97              :  * The longest pattern which matches an input string is always chosen.
      98              :  * For equal-length patterns, the first occurring in the rules list is chosen.
      99              :  * INITIAL is the starting state, to which all non-conditional rules apply.
     100              :  * Exclusive states change parsing rules while the state is active.  When in
     101              :  * an exclusive state, only those rules defined for that state apply.
     102              :  *
     103              :  * We use exclusive states for quoted strings, extended comments,
     104              :  * and to eliminate parsing troubles for numeric strings.
     105              :  * Exclusive states:
     106              :  *  <xb> bit string literal
     107              :  *  <xc> extended C-style comments
     108              :  *  <xd> delimited identifiers (double-quoted identifiers)
     109              :  *  <xh> hexadecimal byte string
     110              :  *  <xq> standard quoted strings
     111              :  *  <xqs> quote stop (detect continued strings)
     112              :  *  <xe> extended quoted strings (support backslash escape sequences)
     113              :  *  <xdolq> $foo$ quoted strings
     114              :  *  <xui> quoted identifier with Unicode escapes
     115              :  *  <xus> quoted string with Unicode escapes
     116              :  *
     117              :  * Note: we intentionally don't mimic the backend's <xeu> state; we have
     118              :  * no need to distinguish it from <xe> state, and no good way to get out
     119              :  * of it in error cases.  The backend just throws yyerror() in those
     120              :  * cases, but that's not an option here.
     121              :  */
     122              : 
     123              : %x xb
     124              : %x xc
     125              : %x xd
     126              : %x xh
     127              : %x xq
     128              : %x xqs
     129              : %x xe
     130              : %x xdolq
     131              : %x xui
     132              : %x xus
     133              : 
     134              : /*
     135              :  * In order to make the world safe for Windows and Mac clients as well as
     136              :  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
     137              :  * sequence will be seen as two successive newlines, but that doesn't cause
     138              :  * any problems.  Comments that start with -- and extend to the next
     139              :  * newline are treated as equivalent to a single whitespace character.
     140              :  *
     141              :  * NOTE a fine point: if there is no newline following --, we will absorb
     142              :  * everything to the end of the input as a comment.  This is correct.  Older
     143              :  * versions of Postgres failed to recognize -- as a comment if the input
     144              :  * did not end with a newline.
     145              :  *
     146              :  * non_newline_space tracks all space characters except newlines.
     147              :  *
     148              :  * XXX if you change the set of whitespace characters, fix scanner_isspace()
     149              :  * to agree.
     150              :  */
     151              : 
     152              : space               [ \t\n\r\f\v]
     153              : non_newline_space   [ \t\f\v]
     154              : newline             [\n\r]
     155              : non_newline         [^\n\r]
     156              : 
     157              : comment         ("--"{non_newline}*)
     158              : 
     159              : whitespace      ({space}+|{comment})
     160              : 
     161              : /*
     162              :  * SQL requires at least one newline in the whitespace separating
     163              :  * string literals that are to be concatenated.  Silly, but who are we
     164              :  * to argue?  Note that {whitespace_with_newline} should not have * after
     165              :  * it, whereas {whitespace} should generally have a * after it...
     166              :  */
     167              : 
     168              : special_whitespace      ({space}+|{comment}{newline})
     169              : non_newline_whitespace  ({non_newline_space}|{comment})
     170              : whitespace_with_newline ({non_newline_whitespace}*{newline}{special_whitespace}*)
     171              : 
     172              : quote           '
     173              : /* If we see {quote} then {quotecontinue}, the quoted string continues */
     174              : quotecontinue   {whitespace_with_newline}{quote}
     175              : 
     176              : /*
     177              :  * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
     178              :  * {quotecontinue}.  It might seem that this could just be {whitespace}*,
     179              :  * but if there's a dash after {whitespace_with_newline}, it must be consumed
     180              :  * to see if there's another dash --- which would start a {comment} and thus
     181              :  * allow continuation of the {quotecontinue} token.
     182              :  */
     183              : quotecontinuefail   {whitespace}*"-"?
     184              : 
     185              : /* Bit string
     186              :  * It is tempting to scan the string for only those characters
     187              :  * which are allowed. However, this leads to silently swallowed
     188              :  * characters if illegal characters are included in the string.
     189              :  * For example, if xbinside is [01] then B'ABCD' is interpreted
     190              :  * as a zero-length string, and the ABCD' is lost!
     191              :  * Better to pass the string forward and let the input routines
     192              :  * validate the contents.
     193              :  */
     194              : xbstart         [bB]{quote}
     195              : xbinside        [^']*
     196              : 
     197              : /* Hexadecimal byte string */
     198              : xhstart         [xX]{quote}
     199              : xhinside        [^']*
     200              : 
     201              : /* National character */
     202              : xnstart         [nN]{quote}
     203              : 
     204              : /* Quoted string that allows backslash escapes */
     205              : xestart         [eE]{quote}
     206              : xeinside        [^\\']+
     207              : xeescape        [\\][^0-7]
     208              : xeoctesc        [\\][0-7]{1,3}
     209              : xehexesc        [\\]x[0-9A-Fa-f]{1,2}
     210              : xeunicode       [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
     211              : xeunicodefail   [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
     212              : 
     213              : /* Extended quote
     214              :  * xqdouble implements embedded quote, ''''
     215              :  */
     216              : xqstart         {quote}
     217              : xqdouble        {quote}{quote}
     218              : xqinside        [^']+
     219              : 
     220              : /* $foo$ style quotes ("dollar quoting")
     221              :  * The quoted string starts with $foo$ where "foo" is an optional string
     222              :  * in the form of an identifier, except that it may not contain "$",
     223              :  * and extends to the first occurrence of an identical string.
     224              :  * There is *no* processing of the quoted text.
     225              :  *
     226              :  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
     227              :  * fails to match its trailing "$".
     228              :  */
     229              : dolq_start      [A-Za-z\200-\377_]
     230              : dolq_cont       [A-Za-z\200-\377_0-9]
     231              : dolqdelim       \$({dolq_start}{dolq_cont}*)?\$
     232              : dolqfailed      \${dolq_start}{dolq_cont}*
     233              : dolqinside      [^$]+
     234              : 
     235              : /* Double quote
     236              :  * Allows embedded spaces and other special characters into identifiers.
     237              :  */
     238              : dquote          \"
     239              : xdstart         {dquote}
     240              : xdstop          {dquote}
     241              : xddouble        {dquote}{dquote}
     242              : xdinside        [^"]+
     243              : 
     244              : /* Quoted identifier with Unicode escapes */
     245              : xuistart        [uU]&{dquote}
     246              : 
     247              : /* Quoted string with Unicode escapes */
     248              : xusstart        [uU]&{quote}
     249              : 
     250              : /* error rule to avoid backup */
     251              : xufailed        [uU]&
     252              : 
     253              : 
     254              : /* C-style comments
     255              :  *
     256              :  * The "extended comment" syntax closely resembles allowable operator syntax.
     257              :  * The tricky part here is to get lex to recognize a string starting with
     258              :  * slash-star as a comment, when interpreting it as an operator would produce
     259              :  * a longer match --- remember lex will prefer a longer match!  Also, if we
     260              :  * have something like plus-slash-star, lex will think this is a 3-character
     261              :  * operator whereas we want to see it as a + operator and a comment start.
     262              :  * The solution is two-fold:
     263              :  * 1. append {op_chars}* to xcstart so that it matches as much text as
     264              :  *    {operator} would. Then the tie-breaker (first matching rule of same
     265              :  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
     266              :  *    in case it contains a star-slash that should terminate the comment.
     267              :  * 2. In the operator rule, check for slash-star within the operator, and
     268              :  *    if found throw it back with yyless().  This handles the plus-slash-star
     269              :  *    problem.
     270              :  * Dash-dash comments have similar interactions with the operator rule.
     271              :  */
     272              : xcstart         \/\*{op_chars}*
     273              : xcstop          \*+\/
     274              : xcinside        [^*/]+
     275              : 
     276              : ident_start     [A-Za-z\200-\377_]
     277              : ident_cont      [A-Za-z\200-\377_0-9\$]
     278              : 
     279              : identifier      {ident_start}{ident_cont}*
     280              : 
     281              : /* Assorted special-case operators and operator-like tokens */
     282              : typecast        "::"
     283              : dot_dot         \.\.
     284              : colon_equals    ":="
     285              : 
     286              : /*
     287              :  * These operator-like tokens (unlike the above ones) also match the {operator}
     288              :  * rule, which means that they might be overridden by a longer match if they
     289              :  * are followed by a comment start or a + or - character. Accordingly, if you
     290              :  * add to this list, you must also add corresponding code to the {operator}
     291              :  * block to return the correct token in such cases. (This is not needed in
     292              :  * psqlscan.l since the token value is ignored there.)
     293              :  */
     294              : equals_greater  "=>"
     295              : less_equals     "<="
     296              : greater_equals  ">="
     297              : less_greater    "<>"
     298              : not_equals      "!="
     299              : /* Note there is no need for left_arrow, since "<-" is not a single operator. */
     300              : right_arrow     "->"
     301              : 
     302              : /*
     303              :  * "self" is the set of chars that should be returned as single-character
     304              :  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
     305              :  * which can be one or more characters long (but if a single-char token
     306              :  * appears in the "self" set, it is not to be returned as an Op).  Note
     307              :  * that the sets overlap, but each has some chars that are not in the other.
     308              :  *
     309              :  * If you change either set, adjust the character lists appearing in the
     310              :  * rule for "operator"!
     311              :  */
     312              : self            [,()\[\].;\:\|\+\-\*\/\%\^\<\>\=]
     313              : op_chars        [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
     314              : operator        {op_chars}+
     315              : 
     316              : /*
     317              :  * Numbers
     318              :  *
     319              :  * Unary minus is not part of a number here.  Instead we pass it separately to
     320              :  * the parser, and there it gets coerced via doNegate().
     321              :  *
     322              :  * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
     323              :  *
     324              :  * {realfail} is added to prevent the need for scanner
     325              :  * backup when the {real} rule fails to match completely.
     326              :  */
     327              : decdigit        [0-9]
     328              : hexdigit        [0-9A-Fa-f]
     329              : octdigit        [0-7]
     330              : bindigit        [0-1]
     331              : 
     332              : decinteger      {decdigit}(_?{decdigit})*
     333              : hexinteger      0[xX](_?{hexdigit})+
     334              : octinteger      0[oO](_?{octdigit})+
     335              : bininteger      0[bB](_?{bindigit})+
     336              : 
     337              : hexfail         0[xX]_?
     338              : octfail         0[oO]_?
     339              : binfail         0[bB]_?
     340              : 
     341              : numeric         (({decinteger}\.{decinteger}?)|(\.{decinteger}))
     342              : numericfail     {decinteger}\.\.
     343              : 
     344              : real            ({decinteger}|{numeric})[Ee][-+]?{decinteger}
     345              : realfail        ({decinteger}|{numeric})[Ee][-+]
     346              : 
     347              : /* Positional parameters don't accept underscores. */
     348              : param           \${decdigit}+
     349              : 
     350              : /*
     351              :  * An identifier immediately following an integer literal is disallowed because
     352              :  * in some cases it's ambiguous what is meant: for example, 0x1234 could be
     353              :  * either a hexinteger or a decinteger "0" and an identifier "x1234".  We can
     354              :  * detect such problems by seeing if integer_junk matches a longer substring
     355              :  * than any of the XXXinteger patterns (decinteger, hexinteger, octinteger,
     356              :  * bininteger).  One "junk" pattern is sufficient because
     357              :  * {decinteger}{identifier} will match all the same strings we'd match with
     358              :  * {hexinteger}{identifier} etc.
     359              :  *
     360              :  * Note that the rule for integer_junk must appear after the ones for
     361              :  * XXXinteger to make this work correctly: 0x1234 will match both hexinteger
     362              :  * and integer_junk, and we need hexinteger to be chosen in that case.
     363              :  *
     364              :  * Also disallow strings matched by numeric_junk, real_junk and param_junk
     365              :  * for consistency.
     366              :  */
     367              : integer_junk    {decinteger}{identifier}
     368              : numeric_junk    {numeric}{identifier}
     369              : real_junk       {real}{identifier}
     370              : param_junk      \${decdigit}+{identifier}
     371              : 
     372              : /* psql-specific: characters allowed in variable names */
     373              : variable_char   [A-Za-z\200-\377_0-9]
     374              : 
     375              : other           .
     376              : 
     377              : /*
     378              :  * Dollar quoted strings are totally opaque, and no escaping is done on them.
     379              :  * Other quoted strings must allow some special characters such as single-quote
     380              :  *  and newline.
     381              :  * Embedded single-quotes are implemented both in the SQL standard
     382              :  *  style of two adjacent single quotes "''" and in the Postgres/Java style
     383              :  *  of escaped-quote "\'".
     384              :  * Other embedded escaped characters are matched explicitly and the leading
     385              :  *  backslash is dropped from the string.
     386              :  * Note that xcstart must appear before operator, as explained above!
     387              :  *  Also whitespace (comment) must appear before operator.
     388              :  */
     389              : 
     390              : %%
     391              : 
     392              : %{
     393              :         /* Declare some local variables inside yylex(), for convenience */
     394              :         PsqlScanState cur_state = yyextra;
     395       799255 :         PQExpBuffer output_buf = cur_state->output_buf;
     396       799255 : 
     397              :         /*
     398              :          * Force flex into the state indicated by start_state.  This has a
     399              :          * couple of purposes: it lets some of the functions below set a new
     400              :          * starting state without ugly direct access to flex variables, and it
     401              :          * allows us to transition from one flex lexer to another so that we
     402              :          * can lex different parts of the source string using separate lexers.
     403              :          */
     404              :         BEGIN(cur_state->start_state);
     405       799255 : %}
     406              : 
     407              : {whitespace}    {
     408              :                     /*
     409              :                      * Note that the whitespace rule includes both true
     410              :                      * whitespace and single-line ("--" style) comments.
     411              :                      * We suppress whitespace until we have collected some
     412              :                      * non-whitespace data.  (This interacts with some
     413              :                      * decisions in MainLoop(); see there for details.)
     414              :                      */
     415              :                     if (output_buf->len > 0)
     416      1900577 :                         ECHO;
     417      1787637 :                 }
     418              : 
     419      1900577 : {xcstart}       {
     420          448 :                     cur_state->xcdepth = 0;
     421          448 :                     BEGIN(xc);
     422          448 :                     /* Put back any characters past slash-star; see above */
     423              :                     yyless(2);
     424          448 :                     ECHO;
     425          448 :                 }
     426              : 
     427          448 : <xc>{
     428              : {xcstart}       {
     429           12 :                     cur_state->xcdepth++;
     430           12 :                     /* Put back any characters past slash-star; see above */
     431              :                     yyless(2);
     432           12 :                     ECHO;
     433           12 :                 }
     434              : 
     435           12 : {xcstop}        {
     436          460 :                     if (cur_state->xcdepth <= 0)
     437          460 :                         BEGIN(INITIAL);
     438          448 :                     else
     439              :                         cur_state->xcdepth--;
     440           12 :                     ECHO;
     441          460 :                 }
     442              : 
     443          460 : {xcinside}      {
     444         1029 :                     ECHO;
     445         1029 :                 }
     446              : 
     447         1029 : {op_chars}      {
     448          277 :                     ECHO;
     449          277 :                 }
     450              : 
     451          277 : \*+             {
     452            0 :                     ECHO;
     453            0 :                 }
     454              : } /* <xc> */
     455            0 : 
     456              : {xbstart}       {
     457          500 :                     BEGIN(xb);
     458          500 :                     ECHO;
     459          500 :                 }
     460              : <xh>{xhinside}    |
     461          500 : <xb>{xbinside}    {
     462         2695 :                     ECHO;
     463         2695 :                 }
     464              : 
     465         2695 : {xhstart}       {
     466         2215 :                     /* Hexadecimal bit type.
     467              :                      * At some point we should simply pass the string
     468              :                      * forward to the parser and label it there.
     469              :                      * In the meantime, place a leading "x" on the string
     470              :                      * to mark it for the input routine as a hex string.
     471              :                      */
     472              :                     BEGIN(xh);
     473         2215 :                     ECHO;
     474         2215 :                 }
     475              : 
     476         2215 : {xnstart}       {
     477            0 :                     yyless(1);  /* eat only 'n' this time */
     478            0 :                     ECHO;
     479            0 :                 }
     480              : 
     481            0 : {xqstart}       {
     482       158798 :                     if (cur_state->std_strings)
     483       158798 :                         BEGIN(xq);
     484       158798 :                     else
     485              :                         BEGIN(xe);
     486            0 :                     ECHO;
     487       158798 :                 }
     488              : {xestart}       {
     489       158798 :                     BEGIN(xe);
     490          877 :                     ECHO;
     491          877 :                 }
     492              : {xusstart}      {
     493          877 :                     BEGIN(xus);
     494          464 :                     ECHO;
     495          464 :                 }
     496              : 
     497          464 : <xb,xh,xq,xe,xus>{quote} {
     498       162854 :                     /*
     499              :                      * When we are scanning a quoted string and see an end
     500              :                      * quote, we must look ahead for a possible continuation.
     501              :                      * If we don't see one, we know the end quote was in fact
     502              :                      * the end of the string.  To reduce the lexer table size,
     503              :                      * we use a single "xqs" state to do the lookahead for all
     504              :                      * types of strings.
     505              :                      */
     506              :                     cur_state->state_before_str_stop = YYSTATE;
     507       162854 :                     BEGIN(xqs);
     508       162854 :                     ECHO;
     509       162854 :                 }
     510              : <xqs>{quotecontinue} {
     511       162854 :                     /*
     512            0 :                      * Found a quote continuation, so return to the in-quote
     513              :                      * state and continue scanning the literal.  Nothing is
     514              :                      * added to the literal's contents.
     515              :                      */
     516              :                     BEGIN(cur_state->state_before_str_stop);
     517            0 :                     ECHO;
     518            0 :                 }
     519              : <xqs>{quotecontinuefail} |
     520            0 : <xqs>{other}  {
     521       162027 :                     /*
     522              :                      * Failed to see a quote continuation.  Throw back
     523              :                      * everything after the end quote, and handle the string
     524              :                      * according to the state we were in previously.
     525              :                      */
     526              :                     yyless(0);
     527       162027 :                     BEGIN(INITIAL);
     528       162027 :                     /* There's nothing to echo ... */
     529              :                 }
     530              : 
     531       162027 : <xq,xe,xus>{xqdouble} {
     532         4127 :                     ECHO;
     533         4127 :                 }
     534              : <xq,xus>{xqinside}  {
     535         4127 :                     ECHO;
     536       166581 :                 }
     537              : <xe>{xeinside}  {
     538       166581 :                     ECHO;
     539         1663 :                 }
     540              : <xe>{xeunicode} {
     541         1663 :                     ECHO;
     542          132 :                 }
     543              : <xe>{xeunicodefail}   {
     544          132 :                     ECHO;
     545            8 :                 }
     546              : <xe>{xeescape}  {
     547            8 :                     ECHO;
     548          962 :                 }
     549              : <xe>{xeoctesc}  {
     550          962 :                     ECHO;
     551           14 :                 }
     552              : <xe>{xehexesc}  {
     553           14 :                     ECHO;
     554            6 :                 }
     555              : <xe>.         {
     556            6 :                     /* This is only needed for \ just before EOF */
     557            0 :                     ECHO;
     558            0 :                 }
     559              : 
     560            0 : {dolqdelim}     {
     561         4584 :                     cur_state->dolqstart = pg_strdup(yytext);
     562         4584 :                     BEGIN(xdolq);
     563         4584 :                     ECHO;
     564         4584 :                 }
     565              : {dolqfailed}    {
     566         4584 :                     /* throw back all but the initial "$" */
     567            0 :                     yyless(1);
     568            0 :                     ECHO;
     569            0 :                 }
     570              : <xdolq>{dolqdelim} {
     571            0 :                     if (strcmp(yytext, cur_state->dolqstart) == 0)
     572         4800 :                     {
     573              :                         free(cur_state->dolqstart);
     574         4584 :                         cur_state->dolqstart = NULL;
     575         4584 :                         BEGIN(INITIAL);
     576         4584 :                     }
     577              :                     else
     578              :                     {
     579              :                         /*
     580              :                          * When we fail to match $...$ to dolqstart, transfer
     581              :                          * the $... part to the output, but put back the final
     582              :                          * $ for rescanning.  Consider $delim$...$junk$delim$
     583              :                          */
     584              :                         yyless(yyleng - 1);
     585          216 :                     }
     586              :                     ECHO;
     587         4800 :                 }
     588              : <xdolq>{dolqinside} {
     589         4800 :                     ECHO;
     590        24257 :                 }
     591              : <xdolq>{dolqfailed} {
     592        24257 :                     ECHO;
     593          574 :                 }
     594              : <xdolq>.      {
     595          574 :                     /* This is only needed for $ inside the quoted text */
     596         1629 :                     ECHO;
     597         1629 :                 }
     598              : 
     599         1629 : {xdstart}       {
     600         6633 :                     BEGIN(xd);
     601         6633 :                     ECHO;
     602         6633 :                 }
     603              : {xuistart}      {
     604         6633 :                     BEGIN(xui);
     605           16 :                     ECHO;
     606           16 :                 }
     607              : <xd>{xdstop}  {
     608           16 :                     BEGIN(INITIAL);
     609         6633 :                     ECHO;
     610         6633 :                 }
     611              : <xui>{dquote} {
     612         6633 :                     BEGIN(INITIAL);
     613           16 :                     ECHO;
     614           16 :                 }
     615              : <xd,xui>{xddouble}    {
     616           16 :                     ECHO;
     617           67 :                 }
     618              : <xd,xui>{xdinside}    {
     619           67 :                     ECHO;
     620         6710 :                 }
     621              : 
     622         6710 : {xufailed}  {
     623            0 :                     /* throw back all but the initial u/U */
     624              :                     yyless(1);
     625            0 :                     ECHO;
     626            0 :                 }
     627              : 
     628            0 : {typecast}      {
     629        37151 :                     ECHO;
     630        37151 :                 }
     631              : 
     632        37151 : {dot_dot}       {
     633            0 :                     ECHO;
     634            0 :                 }
     635              : 
     636            0 : {colon_equals}  {
     637         1673 :                     ECHO;
     638         1673 :                 }
     639              : 
     640         1673 : {equals_greater} {
     641         1345 :                     ECHO;
     642         1345 :                 }
     643              : 
     644         1345 : {less_equals}   {
     645         1413 :                     ECHO;
     646         1413 :                 }
     647              : 
     648         1413 : {greater_equals} {
     649         4195 :                     ECHO;
     650         4195 :                 }
     651              : 
     652         4195 : {less_greater}  {
     653          918 :                     ECHO;
     654          918 :                 }
     655              : 
     656          918 : {not_equals}    {
     657         1525 :                     ECHO;
     658         1525 :                 }
     659              : 
     660         1525 : {right_arrow}   {
     661          781 :                     ECHO;
     662          781 :                 }
     663              : 
     664          781 :     /*
     665              :      * These rules are specific to psql --- they implement parenthesis
     666              :      * counting and detection of command-ending semicolon.  These must
     667              :      * appear before the {self} rule so that they take precedence over it.
     668              :      */
     669              : 
     670       257294 : "("               {
     671              :                     cur_state->paren_depth++;
     672       257294 :                     ECHO;
     673       257294 :                 }
     674              : 
     675       257294 : ")"               {
     676       257285 :                     if (cur_state->paren_depth > 0)
     677       257285 :                         cur_state->paren_depth--;
     678       257285 :                     ECHO;
     679       257285 :                 }
     680              : 
     681       257285 : ";"               {
     682       245292 :                     ECHO;
     683       245292 :                     if (cur_state->paren_depth == 0 &&
     684       245292 :                         cur_state->begin_depth == 0)
     685       245256 :                     {
     686              :                         /* Terminate lexing temporarily */
     687              :                         cur_state->start_state = YY_START;
     688       245129 :                         cur_state->init_idents_count = 0;
     689       245129 :                         return LEXRES_SEMI;
     690       245129 :                     }
     691              :                 }
     692              : 
     693          163 :     /*
     694              :      * psql-specific rules to handle backslash commands and variable
     695              :      * substitution.  We want these before {self}, also.
     696              :      */
     697              : 
     698          512 : "\\"[;:]      {
     699              :                     /* Force a semi-colon or colon into the query buffer */
     700              :                     psqlscan_emit(cur_state, yytext + 1, 1);
     701          512 :                     /* Reset BEGIN/END tracking if semi at outer level */
     702              :                     if (yytext[1] == ';' &&
     703          512 :                         cur_state->paren_depth == 0 &&
     704          512 :                         cur_state->begin_depth == 0)
     705          512 :                         cur_state->init_idents_count = 0;
     706          512 :                 }
     707              : 
     708          512 : "\\"          {
     709        32810 :                     /* Terminate lexing temporarily */
     710              :                     cur_state->start_state = YY_START;
     711        32810 :                     return LEXRES_BACKSLASH;
     712        32810 :                 }
     713              : 
     714              : :{variable_char}+   {
     715         1787 :                     /* Possible psql variable substitution */
     716              :                     char       *varname;
     717              :                     char       *value;
     718              : 
     719              :                     varname = psqlscan_extract_substring(cur_state,
     720         1787 :                                                          yytext + 1,
     721         1787 :                                                          yyleng - 1);
     722         1787 :                     if (cur_state->callbacks->get_variable)
     723         1787 :                         value = cur_state->callbacks->get_variable(varname,
     724         1191 :                                                                    PQUOTE_PLAIN,
     725              :                                                                    cur_state->cb_passthrough);
     726              :                     else
     727              :                         value = NULL;
     728          596 : 
     729              :                     if (value)
     730         1787 :                     {
     731              :                         /* It is a variable, check for recursion */
     732              :                         if (psqlscan_var_is_current_source(cur_state, varname))
     733          883 :                         {
     734              :                             /* Recursive expansion --- don't go there */
     735              :                             pg_log_warning("skipping recursive expansion of variable \"%s\"",
     736            0 :                                                               varname);
     737              :                             /* Instead copy the string as is */
     738              :                             ECHO;
     739            0 :                         }
     740              :                         else
     741              :                         {
     742              :                             /* OK, perform substitution */
     743              :                             psqlscan_push_new_buffer(cur_state, value, varname);
     744          883 :                             /* yy_scan_string already made buffer active */
     745              :                         }
     746              :                         free(value);
     747          883 :                     }
     748              :                     else
     749              :                     {
     750              :                         /*
     751              :                          * if the variable doesn't exist we'll copy the string
     752              :                          * as is
     753              :                          */
     754              :                         ECHO;
     755          904 :                     }
     756              : 
     757              :                     free(varname);
     758         1787 :                 }
     759              : 
     760         1787 : :'{variable_char}+' {
     761          656 :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     762          656 :                                              PQUOTE_SQL_LITERAL);
     763              :                 }
     764              : 
     765          656 : :\"{variable_char}+\" {
     766           21 :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     767           21 :                                              PQUOTE_SQL_IDENT);
     768              :                 }
     769              : 
     770           21 : :\{\?{variable_char}+\} {
     771            8 :                     psqlscan_test_variable(cur_state, yytext, yyleng);
     772            8 :                 }
     773              : 
     774            8 :     /*
     775              :      * These rules just avoid the need for scanner backup if one of the
     776              :      * three rules above fails to match completely.
     777              :      */
     778              : 
     779            0 : :'{variable_char}*  {
     780              :                     /* Throw back everything but the colon */
     781              :                     yyless(1);
     782            0 :                     ECHO;
     783            0 :                 }
     784              : 
     785            0 : :\"{variable_char}*    {
     786            0 :                     /* Throw back everything but the colon */
     787              :                     yyless(1);
     788            0 :                     ECHO;
     789            0 :                 }
     790              : 
     791            0 : :\{\?{variable_char}*   {
     792            0 :                     /* Throw back everything but the colon */
     793              :                     yyless(1);
     794            0 :                     ECHO;
     795            0 :                 }
     796              : :\{ {
     797            0 :                     /* Throw back everything but the colon */
     798            0 :                     yyless(1);
     799            0 :                     ECHO;
     800            0 :                 }
     801              : 
     802            0 :     /*
     803              :      * Back to backend-compatible rules.
     804              :      */
     805              : 
     806       447764 : {self}          {
     807              :                     ECHO;
     808       447764 :                 }
     809              : 
     810       447764 : {operator}      {
     811        12754 :                     /*
     812              :                      * Check for embedded slash-star or dash-dash; those
     813              :                      * are comment starts, so operator must stop there.
     814              :                      * Note that slash-star or dash-dash at the first
     815              :                      * character will match a prior rule, not this one.
     816              :                      */
     817              :                     int         nchars = yyleng;
     818        12754 :                     char       *slashstar = strstr(yytext, "/*");
     819        12754 :                     char       *dashdash = strstr(yytext, "--");
     820        12754 : 
     821              :                     if (slashstar && dashdash)
     822        12754 :                     {
     823              :                         /* if both appear, take the first one */
     824              :                         if (slashstar > dashdash)
     825            0 :                             slashstar = dashdash;
     826            0 :                     }
     827              :                     else if (!slashstar)
     828        12754 :                         slashstar = dashdash;
     829        12714 :                     if (slashstar)
     830        12754 :                         nchars = slashstar - yytext;
     831           48 : 
     832              :                     /*
     833              :                      * For SQL compatibility, '+' and '-' cannot be the
     834              :                      * last char of a multi-char operator unless the operator
     835              :                      * contains chars that are not in SQL operators.
     836              :                      * The idea is to lex '=-' as two operators, but not
     837              :                      * to forbid operator names like '?-' that could not be
     838              :                      * sequences of SQL operators.
     839              :                      */
     840              :                     if (nchars > 1 &&
     841        12754 :                         (yytext[nchars - 1] == '+' ||
     842        11724 :                          yytext[nchars - 1] == '-'))
     843        11720 :                     {
     844              :                         int         ic;
     845              : 
     846              :                         for (ic = nchars - 2; ic >= 0; ic--)
     847          385 :                         {
     848              :                             char c = yytext[ic];
     849          326 :                             if (c == '~' || c == '!' || c == '@' ||
     850          326 :                                 c == '#' || c == '^' || c == '&' ||
     851          270 :                                 c == '|' || c == '`' || c == '?' ||
     852          106 :                                 c == '%')
     853              :                                 break;
     854              :                         }
     855              :                         if (ic < 0)
     856          291 :                         {
     857              :                             /*
     858              :                              * didn't find a qualifying character, so remove
     859              :                              * all trailing [+-]
     860              :                              */
     861              :                             do {
     862              :                                 nchars--;
     863           59 :                             } while (nchars > 1 &&
     864           59 :                                  (yytext[nchars - 1] == '+' ||
     865           23 :                                   yytext[nchars - 1] == '-'));
     866           23 :                         }
     867              :                     }
     868              : 
     869              :                     if (nchars < yyleng)
     870        12754 :                     {
     871              :                         /* Strip the unwanted chars from the token */
     872              :                         yyless(nchars);
     873          107 :                     }
     874              :                     ECHO;
     875        12754 :                 }
     876              : 
     877        12754 : {param}         {
     878          924 :                     ECHO;
     879          924 :                 }
     880              : {param_junk}    {
     881          924 :                     ECHO;
     882            8 :                 }
     883              : 
     884            8 : {decinteger}    {
     885       142181 :                     ECHO;
     886       142181 :                 }
     887              : {hexinteger}    {
     888       142181 :                     ECHO;
     889           83 :                 }
     890              : {octinteger}    {
     891           83 :                     ECHO;
     892           40 :                 }
     893              : {bininteger}    {
     894           40 :                     ECHO;
     895           40 :                 }
     896              : {hexfail}       {
     897           40 :                     ECHO;
     898            4 :                 }
     899              : {octfail}       {
     900            4 :                     ECHO;
     901            4 :                 }
     902              : {binfail}       {
     903            4 :                     ECHO;
     904            4 :                 }
     905              : {numeric}       {
     906            4 :                     ECHO;
     907         5335 :                 }
     908              : {numericfail}   {
     909         5335 :                     /* throw back the .., and treat as integer */
     910            0 :                     yyless(yyleng - 2);
     911            0 :                     ECHO;
     912            0 :                 }
     913              : {real}          {
     914            0 :                     ECHO;
     915          506 :                 }
     916              : {realfail}      {
     917          506 :                     ECHO;
     918            4 :                 }
     919              : {integer_junk}  {
     920            4 :                     ECHO;
     921           44 :                 }
     922              : {numeric_junk}  {
     923           44 :                     ECHO;
     924           32 :                 }
     925              : {real_junk}     {
     926           32 :                     ECHO;
     927            0 :                 }
     928              : 
     929            0 : 
     930      1848207 : {identifier}    {
     931              :                     psqlscan_track_identifier(cur_state, yytext);
     932      1848207 :                     ECHO;
     933      1848207 :                 }
     934              : 
     935      1848207 : {other}         {
     936            8 :                     ECHO;
     937            8 :                 }
     938              : 
     939            8 : <<EOF>>         {
     940       522199 :                     if (cur_state->buffer_stack == NULL)
     941       522199 :                     {
     942              :                         cur_state->start_state = YY_START;
     943       521316 :                         return LEXRES_EOL;      /* end of input reached */
     944       521316 :                     }
     945              : 
     946              :                     /*
     947              :                      * We were expanding a variable, so pop the inclusion
     948              :                      * stack and keep lexing
     949              :                      */
     950              :                     psqlscan_pop_buffer_stack(cur_state);
     951          883 :                     psqlscan_select_top_buffer(cur_state);
     952          883 :                 }
     953              : 
     954          883 : %%
     955            0 : 
     956              : /* LCOV_EXCL_STOP */
     957              : 
     958              : /*
     959              :  * Record the first few keywords/identifiers of a statement or CREATE
     960              :  * SCHEMA sub-statement in the idents[] array, of length idents_size.
     961              :  * *idents_count is the number of entries filled so far.
     962              :  *
     963              :  * We record the interesting keywords using their first character, which
     964              :  * works so long as those are all different.  We could switch to an enum
     965              :  * if that stops being true, but for now this is easy and compact.
     966              :  */
     967              : static void
     968              : psqlscan_record_initial_keyword(const char *identifier,
     969      1397150 :                                 char *idents,
     970              :                                 int idents_size,
     971              :                                 int *idents_count)
     972              : {
     973              :     if (*idents_count < idents_size)
     974      1397150 :     {
     975              :         /*
     976              :          * What we need to recognize is CREATE [OR REPLACE] FUNCTION/PROCEDURE
     977              :          * and CREATE SCHEMA.  Checking for SCHEMA is useless but not harmful
     978              :          * in the CREATE SCHEMA sub-statement case.
     979              :          */
     980              :         if (pg_strcasecmp(identifier, "create") == 0 ||
     981      1624396 :             pg_strcasecmp(identifier, "function") == 0 ||
     982      1574034 :             pg_strcasecmp(identifier, "procedure") == 0 ||
     983      1566922 :             pg_strcasecmp(identifier, "or") == 0 ||
     984      1565123 :             pg_strcasecmp(identifier, "replace") == 0 ||
     985      1562189 :             pg_strcasecmp(identifier, "schema") == 0)
     986       780358 :             idents[*idents_count] = pg_tolower((unsigned char) identifier[0]);
     987        55631 :         /* For other keywords or identifiers, leave '\0' in the array entry */
     988              :         (*idents_count)++;
     989       833992 :     }
     990              : }
     991      1397150 : 
     992              : /*
     993              :  * Does the current input match CREATE [OR REPLACE] {FUNCTION|PROCEDURE}?
     994              :  */
     995              : static bool
     996              : psqlscan_is_create_routine(const char *idents)
     997      1397170 : {
     998              :     return idents[0] == 'c' &&
     999      1698496 :         (idents[1] == 'f' || idents[1] == 'p' ||
    1000       301326 :          (idents[1] == 'o' && idents[2] == 'r' &&
    1001       270995 :           (idents[3] == 'f' || idents[3] == 'p')));
    1002        12534 : }
    1003              : 
    1004              : /*
    1005              :  * Track whether we are inside a BEGIN .. END block in a function definition,
    1006              :  * so that semicolons contained therein don't terminate the whole statement.
    1007              :  * Short of writing a full parser here, the following heuristic should work.
    1008              :  *
    1009              :  * We track whether the beginning of the statement matches CREATE [OR REPLACE]
    1010              :  * {FUNCTION|PROCEDURE}.  For CREATE SCHEMA, track BEGIN .. END blocks only
    1011              :  * after recognizing an embedded CREATE [OR REPLACE] {FUNCTION|PROCEDURE}
    1012              :  * subcommand.  Once one of these conditions holds, count BEGIN and END
    1013              :  * pairs.  We also have to account for CASE ... END.
    1014              :  */
    1015              : static void
    1016              : psqlscan_track_identifier(PsqlScanState state, const char *identifier)
    1017      1848207 : {
    1018              :     bool        is_create_schema;
    1019              : 
    1020              :     /* None of this needs to happen when we're inside parentheses */
    1021              :     if (state->paren_depth != 0)
    1022      1848207 :         return;
    1023       455704 : 
    1024              :     /* Reset all my state at the start of each new statement */
    1025              :     if (state->init_idents_count == 0)
    1026      1392503 :     {
    1027              :         memset(state->init_idents, 0, sizeof(state->init_idents));
    1028       252066 :         state->sub_idents_count = 0;
    1029       252066 :         memset(state->sub_idents, 0, sizeof(state->sub_idents));
    1030       252066 :     }
    1031              : 
    1032              :     /* Record initial keywords if init_idents_count is small enough */
    1033              :     psqlscan_record_initial_keyword(identifier,
    1034      1392503 :                                     state->init_idents,
    1035      1392503 :                                     lengthof(state->init_idents),
    1036              :                                     &state->init_idents_count);
    1037              : 
    1038              :     /*
    1039              :      * In CREATE SCHEMA, track identifiers from each top-level CREATE schema
    1040              :      * element separately, so that BEGIN/END tracking is enabled only within
    1041              :      * CREATE [OR REPLACE] {FUNCTION|PROCEDURE} clauses.
    1042              :      */
    1043              :     is_create_schema = (state->init_idents[0] == 'c' &&
    1044      1690684 :                         state->init_idents[1] == 's');
    1045       298181 :     if (is_create_schema &&
    1046      1392503 :         state->begin_depth == 0)
    1047         4667 :     {
    1048              :         /* Reset sub-clause state at each top-level CREATE keyword */
    1049              :         if (pg_strcasecmp(identifier, "create") == 0)
    1050         4647 :         {
    1051              :             state->sub_idents_count = 0;
    1052          500 :             memset(state->sub_idents, 0, sizeof(state->sub_idents));
    1053          500 :         }
    1054              :         /* ... and record the first few keywords following that */
    1055              :         psqlscan_record_initial_keyword(identifier,
    1056         4647 :                                         state->sub_idents,
    1057         4647 :                                         lengthof(state->sub_idents),
    1058              :                                         &state->sub_idents_count);
    1059              :     }
    1060              : 
    1061              :     /*
    1062              :      * Track BEGIN/CASE/END only when within an appropriate (sub) statement.
    1063              :      */
    1064              :     if (psqlscan_is_create_routine(state->init_idents) ||
    1065      1392503 :         (is_create_schema &&
    1066         4667 :          psqlscan_is_create_routine(state->sub_idents)))
    1067         4667 :     {
    1068              :         if (pg_strcasecmp(identifier, "begin") == 0)
    1069        38444 :             state->begin_depth++;
    1070          115 :         else if (pg_strcasecmp(identifier, "case") == 0)
    1071        38329 :         {
    1072              :             /*
    1073              :              * CASE also ends with END.  We only need to track this if we are
    1074              :              * already inside a BEGIN.
    1075              :              */
    1076              :             if (state->begin_depth >= 1)
    1077            4 :                 state->begin_depth++;
    1078            4 :         }
    1079              :         else if (pg_strcasecmp(identifier, "end") == 0)
    1080        38325 :         {
    1081              :             if (state->begin_depth > 0)
    1082          119 :                 state->begin_depth--;
    1083          119 :         }
    1084              :     }
    1085              : }
    1086              : 
    1087              : /*
    1088              :  * Create a lexer working state struct.
    1089              :  *
    1090              :  * callbacks is a struct of function pointers that encapsulate some
    1091              :  * behavior we need from the surrounding program.  This struct must
    1092              :  * remain valid for the lifespan of the PsqlScanState.
    1093              :  */
    1094              : PsqlScanState
    1095              : psql_scan_create(const PsqlScanCallbacks *callbacks)
    1096        10660 : {
    1097              :     PsqlScanState state;
    1098              : 
    1099              :     state = pg_malloc0_object(PsqlScanStateData);
    1100        10660 : 
    1101              :     state->callbacks = callbacks;
    1102        10660 : 
    1103              :     yylex_init(&state->scanner);
    1104        10660 : 
    1105              :     yyset_extra(state, state->scanner);
    1106        10660 : 
    1107              :     psql_scan_reset(state);
    1108        10660 : 
    1109              :     return state;
    1110        10660 : }
    1111              : 
    1112              : /*
    1113              :  * Destroy a lexer working state struct, releasing all resources.
    1114              :  */
    1115              : void
    1116              : psql_scan_destroy(PsqlScanState state)
    1117        10604 : {
    1118              :     psql_scan_finish(state);
    1119        10604 : 
    1120              :     psql_scan_reset(state);
    1121        10604 : 
    1122              :     yylex_destroy(state->scanner);
    1123        10604 : 
    1124              :     free(state);
    1125        10604 : }
    1126        10604 : 
    1127              : /*
    1128              :  * Set the callback passthrough pointer for the lexer.
    1129              :  *
    1130              :  * This could have been integrated into psql_scan_create, but keeping it
    1131              :  * separate allows the application to change the pointer later, which might
    1132              :  * be useful.
    1133              :  */
    1134              : void
    1135              : psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
    1136        10061 : {
    1137              :     state->cb_passthrough = passthrough;
    1138        10061 : }
    1139        10061 : 
    1140              : /*
    1141              :  * Set up to perform lexing of the given input line.
    1142              :  *
    1143              :  * The text at *line, extending for line_len bytes, will be scanned by
    1144              :  * subsequent calls to the psql_scan routines.  psql_scan_finish should
    1145              :  * be called when scanning is complete.  Note that the lexer retains
    1146              :  * a pointer to the storage at *line --- this string must not be altered
    1147              :  * or freed until after psql_scan_finish is called.
    1148              :  *
    1149              :  * encoding is the libpq identifier for the character encoding in use,
    1150              :  * and std_strings says whether standard_conforming_strings is on.
    1151              :  */
    1152              : void
    1153              : psql_scan_setup(PsqlScanState state,
    1154       521665 :                 const char *line, int line_len,
    1155              :                 int encoding, bool std_strings)
    1156              : {
    1157              :     /* Mustn't be scanning already */
    1158              :     Assert(state->scanbufhandle == NULL);
    1159              :     Assert(state->buffer_stack == NULL);
    1160              : 
    1161              :     /* Do we need to hack the character set encoding? */
    1162              :     state->encoding = encoding;
    1163       521665 :     state->safe_encoding = pg_valid_server_encoding_id(encoding);
    1164       521665 : 
    1165              :     /* Save standard-strings flag as well */
    1166              :     state->std_strings = std_strings;
    1167       521665 : 
    1168              :     /* Set up flex input buffer with appropriate translation and padding */
    1169              :     state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
    1170       521665 :                                                    &state->scanbuf);
    1171              :     state->scanline = line;
    1172       521665 : 
    1173              :     /* Set lookaside data in case we have to map unsafe encoding */
    1174              :     state->curline = state->scanbuf;
    1175       521665 :     state->refline = state->scanline;
    1176       521665 : 
    1177              :     /* Initialize state for psql_scan_get_location() */
    1178              :     state->cur_line_no = 0;      /* yylex not called yet */
    1179       521665 :     state->cur_line_ptr = state->scanbuf;
    1180       521665 : }
    1181       521665 : 
    1182              : /*
    1183              :  * Do lexical analysis of SQL command text.
    1184              :  *
    1185              :  * The text previously passed to psql_scan_setup is scanned, and appended
    1186              :  * (possibly with transformation) to query_buf.
    1187              :  *
    1188              :  * The return value indicates the condition that stopped scanning:
    1189              :  *
    1190              :  * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
    1191              :  * transferred to query_buf.)  The command accumulated in query_buf should
    1192              :  * be executed, then clear query_buf and call again to scan the remainder
    1193              :  * of the line.
    1194              :  *
    1195              :  * PSCAN_BACKSLASH: found a backslash that starts a special command.
    1196              :  * Any previous data on the line has been transferred to query_buf.
    1197              :  * The caller will typically next apply a separate flex lexer to scan
    1198              :  * the special command.
    1199              :  *
    1200              :  * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
    1201              :  * incomplete SQL command.  *prompt is set to the appropriate prompt type.
    1202              :  *
    1203              :  * PSCAN_EOL: the end of the line was reached, and there is no lexical
    1204              :  * reason to consider the command incomplete.  The caller may or may not
    1205              :  * choose to send it.  *prompt is set to the appropriate prompt type if
    1206              :  * the caller chooses to collect more input.
    1207              :  *
    1208              :  * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
    1209              :  * be called next, then the cycle may be repeated with a fresh input line.
    1210              :  *
    1211              :  * In all cases, *prompt is set to an appropriate prompt type code for the
    1212              :  * next line-input operation.
    1213              :  */
    1214              : PsqlScanResult
    1215              : psql_scan(PsqlScanState state,
    1216       799255 :           PQExpBuffer query_buf,
    1217              :           promptStatus_t *prompt)
    1218              : {
    1219              :     PsqlScanResult result;
    1220              :     int         lexresult;
    1221              : 
    1222              :     /* Must be scanning already */
    1223              :     Assert(state->scanbufhandle != NULL);
    1224              : 
    1225              :     /* Set current output target */
    1226              :     state->output_buf = query_buf;
    1227       799255 : 
    1228              :     /* Set input source */
    1229              :     if (state->buffer_stack != NULL)
    1230       799255 :         yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
    1231           60 :     else
    1232              :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1233       799195 : 
    1234              :     /* And lex. */
    1235              :     lexresult = yylex(NULL, state->scanner);
    1236       799255 : 
    1237              :     /* Notify psql_scan_get_location() that a yylex call has been made. */
    1238              :     if (state->cur_line_no == 0)
    1239       799255 :         state->cur_line_no = 1;
    1240       521663 : 
    1241              :     /*
    1242              :      * Check termination state and return appropriate result info.
    1243              :      */
    1244              :     switch (lexresult)
    1245       799255 :     {
    1246              :         case LEXRES_EOL:        /* end of input */
    1247       521316 :             switch (state->start_state)
    1248       521316 :             {
    1249              :                 case INITIAL:
    1250       489576 :                 case xqs:       /* we treat this like INITIAL */
    1251              :                     if (state->paren_depth > 0)
    1252       489576 :                     {
    1253              :                         result = PSCAN_INCOMPLETE;
    1254        43108 :                         *prompt = PROMPT_PAREN;
    1255        43108 :                     }
    1256              :                     else if (state->begin_depth > 0)
    1257       446468 :                     {
    1258              :                         result = PSCAN_INCOMPLETE;
    1259          673 :                         *prompt = PROMPT_CONTINUE;
    1260          673 :                     }
    1261              :                     else if (query_buf->len > 0)
    1262       445795 :                     {
    1263              :                         result = PSCAN_EOL;
    1264        94522 :                         *prompt = PROMPT_CONTINUE;
    1265        94522 :                     }
    1266              :                     else
    1267              :                     {
    1268              :                         /* never bother to send an empty buffer */
    1269              :                         result = PSCAN_INCOMPLETE;
    1270       351273 :                         *prompt = PROMPT_READY;
    1271       351273 :                     }
    1272              :                     break;
    1273       489576 :                 case xb:
    1274            0 :                     result = PSCAN_INCOMPLETE;
    1275            0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1276            0 :                     break;
    1277            0 :                 case xc:
    1278          513 :                     result = PSCAN_INCOMPLETE;
    1279          513 :                     *prompt = PROMPT_COMMENT;
    1280          513 :                     break;
    1281          513 :                 case xd:
    1282           23 :                     result = PSCAN_INCOMPLETE;
    1283           23 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1284           23 :                     break;
    1285           23 :                 case xh:
    1286            0 :                     result = PSCAN_INCOMPLETE;
    1287            0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1288            0 :                     break;
    1289            0 :                 case xe:
    1290          301 :                     result = PSCAN_INCOMPLETE;
    1291          301 :                     *prompt = PROMPT_SINGLEQUOTE;
    1292          301 :                     break;
    1293          301 :                 case xq:
    1294         7046 :                     result = PSCAN_INCOMPLETE;
    1295         7046 :                     *prompt = PROMPT_SINGLEQUOTE;
    1296         7046 :                     break;
    1297         7046 :                 case xdolq:
    1298        23857 :                     result = PSCAN_INCOMPLETE;
    1299        23857 :                     *prompt = PROMPT_DOLLARQUOTE;
    1300        23857 :                     break;
    1301        23857 :                 case xui:
    1302            0 :                     result = PSCAN_INCOMPLETE;
    1303            0 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1304            0 :                     break;
    1305            0 :                 case xus:
    1306            0 :                     result = PSCAN_INCOMPLETE;
    1307            0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1308            0 :                     break;
    1309            0 :                 default:
    1310            0 :                     /* can't get here */
    1311              :                     fprintf(stderr, "invalid YY_START\n");
    1312            0 :                     exit(1);
    1313            0 :             }
    1314              :             break;
    1315       521316 :         case LEXRES_SEMI:       /* semicolon */
    1316       245129 :             result = PSCAN_SEMICOLON;
    1317       245129 :             *prompt = PROMPT_READY;
    1318       245129 :             break;
    1319       245129 :         case LEXRES_BACKSLASH:  /* backslash */
    1320        32810 :             result = PSCAN_BACKSLASH;
    1321        32810 :             *prompt = PROMPT_READY;
    1322        32810 :             break;
    1323        32810 :         default:
    1324            0 :             /* can't get here */
    1325              :             fprintf(stderr, "invalid yylex result\n");
    1326            0 :             exit(1);
    1327            0 :     }
    1328              : 
    1329              :     return result;
    1330       799255 : }
    1331              : 
    1332              : /*
    1333              :  * Clean up after scanning a string.  This flushes any unread input and
    1334              :  * releases resources (but not the PsqlScanState itself).  Note however
    1335              :  * that this does not reset the lexer scan state; that can be done by
    1336              :  * psql_scan_reset(), which is an orthogonal operation.
    1337              :  *
    1338              :  * It is legal to call this when not scanning anything (makes it easier
    1339              :  * to deal with error recovery).
    1340              :  */
    1341              : void
    1342              : psql_scan_finish(PsqlScanState state)
    1343       531898 : {
    1344              :     /* Drop any incomplete variable expansions. */
    1345              :     while (state->buffer_stack != NULL)
    1346       531898 :         psqlscan_pop_buffer_stack(state);
    1347            0 : 
    1348              :     /* Done with the outer scan buffer, too */
    1349              :     if (state->scanbufhandle)
    1350       531898 :         yy_delete_buffer(state->scanbufhandle, state->scanner);
    1351       521610 :     state->scanbufhandle = NULL;
    1352       531898 :     if (state->scanbuf)
    1353       531898 :         free(state->scanbuf);
    1354       521610 :     state->scanbuf = NULL;
    1355       531898 : }
    1356       531898 : 
    1357              : /*
    1358              :  * Reset lexer scanning state to start conditions.  This is appropriate
    1359              :  * for executing \r psql commands (or any other time that we discard the
    1360              :  * prior contents of query_buf).  It is not, however, necessary to do this
    1361              :  * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
    1362              :  * PSCAN_EOL scan result, because the scan state must be INITIAL when those
    1363              :  * conditions are returned.
    1364              :  *
    1365              :  * Note that this is unrelated to flushing unread input; that task is
    1366              :  * done by psql_scan_finish().
    1367              :  */
    1368              : void
    1369              : psql_scan_reset(PsqlScanState state)
    1370        23577 : {
    1371              :     state->start_state = INITIAL;
    1372        23577 :     state->paren_depth = 0;
    1373        23577 :     state->xcdepth = 0;          /* not really necessary */
    1374        23577 :     if (state->dolqstart)
    1375        23577 :         free(state->dolqstart);
    1376            0 :     state->dolqstart = NULL;
    1377        23577 :     state->begin_depth = 0;
    1378        23577 :     state->init_idents_count = 0;
    1379        23577 : }
    1380        23577 : 
    1381              : /*
    1382              :  * Reselect this lexer (psqlscan.l) after using another one.
    1383              :  *
    1384              :  * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
    1385              :  * state, because we'd never switch to another lexer in a different state.
    1386              :  * However, we don't want to reset e.g. paren_depth, so this can't be
    1387              :  * the same as psql_scan_reset().
    1388              :  *
    1389              :  * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
    1390              :  * must be a superset of this.
    1391              :  *
    1392              :  * Note: it seems likely that other lexers could just assign INITIAL for
    1393              :  * themselves, since that probably has the value zero in every flex-generated
    1394              :  * lexer.  But let's not assume that.
    1395              :  */
    1396              : void
    1397              : psql_scan_reselect_sql_lexer(PsqlScanState state)
    1398       155564 : {
    1399              :     state->start_state = INITIAL;
    1400       155564 : }
    1401       155564 : 
    1402              : /*
    1403              :  * Return true if lexer is currently in an "inside quotes" state.
    1404              :  *
    1405              :  * This is pretty grotty but is needed to preserve the old behavior
    1406              :  * that mainloop.c drops blank lines not inside quotes without even
    1407              :  * echoing them.
    1408              :  */
    1409              : bool
    1410              : psql_scan_in_quote(PsqlScanState state)
    1411       100382 : {
    1412              :     return state->start_state != INITIAL &&
    1413       101000 :         state->start_state != xqs;
    1414          618 : }
    1415              : 
    1416              : /*
    1417              :  * Return the current scanning location (end+1 of last scanned token),
    1418              :  * as a line number counted from 1 and an offset from string start.
    1419              :  *
    1420              :  * This considers only the outermost input string, and therefore is of
    1421              :  * limited use for programs that use psqlscan_push_new_buffer().
    1422              :  *
    1423              :  * It would be a bit easier probably to use "%option yylineno" to count
    1424              :  * lines, but the flex manual says that has a performance cost, and only
    1425              :  * a minority of programs using psqlscan have need for this functionality.
    1426              :  * So we implement it ourselves without adding overhead to the lexer itself.
    1427              :  */
    1428              : void
    1429              : psql_scan_get_location(PsqlScanState state,
    1430         1737 :                        int *lineno, int *offset)
    1431              : {
    1432              :     const char *line_end;
    1433              : 
    1434              :     /*
    1435              :      * We rely on flex's having stored a NUL after the current token in
    1436              :      * scanbuf.  Therefore we must specially handle the state before yylex()
    1437              :      * has been called, when obviously that won't have happened yet.
    1438              :      */
    1439              :     if (state->cur_line_no == 0)
    1440         1737 :     {
    1441              :         *lineno = 1;
    1442            0 :         *offset = 0;
    1443            0 :         return;
    1444            0 :     }
    1445              : 
    1446              :     /*
    1447              :      * Advance cur_line_no/cur_line_ptr past whatever has been lexed so far.
    1448              :      * Doing this prevents repeated calls from being O(N^2) for long inputs.
    1449              :      */
    1450              :     while ((line_end = strchr(state->cur_line_ptr, '\n')) != NULL)
    1451         2210 :     {
    1452              :         state->cur_line_no++;
    1453          473 :         state->cur_line_ptr = line_end + 1;
    1454          473 :     }
    1455              :     state->cur_line_ptr += strlen(state->cur_line_ptr);
    1456         1737 : 
    1457              :     /* Report current location. */
    1458              :     *lineno = state->cur_line_no;
    1459         1737 :     *offset = state->cur_line_ptr - state->scanbuf;
    1460         1737 : }
    1461              : 
    1462              : /*
    1463              :  * Push the given string onto the stack of stuff to scan.
    1464              :  *
    1465              :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1466              :  */
    1467              : void
    1468              : psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
    1469          883 :                          const char *varname)
    1470              : {
    1471              :     StackElem  *stackelem;
    1472              : 
    1473              :     stackelem = pg_malloc_object(StackElem);
    1474          883 : 
    1475              :     /*
    1476              :      * In current usage, the passed varname points at the current flex input
    1477              :      * buffer; we must copy it before calling psqlscan_prepare_buffer()
    1478              :      * because that will change the buffer state.
    1479              :      */
    1480              :     stackelem->varname = varname ? pg_strdup(varname) : NULL;
    1481          883 : 
    1482              :     stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
    1483          883 :                                              &stackelem->bufstring);
    1484              :     state->curline = stackelem->bufstring;
    1485          883 :     if (state->safe_encoding)
    1486          883 :     {
    1487              :         stackelem->origstring = NULL;
    1488          883 :         state->refline = stackelem->bufstring;
    1489          883 :     }
    1490              :     else
    1491              :     {
    1492              :         stackelem->origstring = pg_strdup(newstr);
    1493            0 :         state->refline = stackelem->origstring;
    1494            0 :     }
    1495              :     stackelem->next = state->buffer_stack;
    1496          883 :     state->buffer_stack = stackelem;
    1497          883 : }
    1498          883 : 
    1499              : /*
    1500              :  * Pop the topmost buffer stack item (there must be one!)
    1501              :  *
    1502              :  * NB: after this, the flex input state is unspecified; caller must
    1503              :  * switch to an appropriate buffer to continue lexing.
    1504              :  * See psqlscan_select_top_buffer().
    1505              :  */
    1506              : void
    1507              : psqlscan_pop_buffer_stack(PsqlScanState state)
    1508          883 : {
    1509              :     StackElem  *stackelem = state->buffer_stack;
    1510          883 : 
    1511              :     state->buffer_stack = stackelem->next;
    1512          883 :     yy_delete_buffer(stackelem->buf, state->scanner);
    1513          883 :     free(stackelem->bufstring);
    1514          883 :     if (stackelem->origstring)
    1515          883 :         free(stackelem->origstring);
    1516            0 :     if (stackelem->varname)
    1517          883 :         free(stackelem->varname);
    1518          883 :     free(stackelem);
    1519          883 : }
    1520          883 : 
    1521              : /*
    1522              :  * Select the topmost surviving buffer as the active input.
    1523              :  */
    1524              : void
    1525              : psqlscan_select_top_buffer(PsqlScanState state)
    1526          883 : {
    1527              :     StackElem  *stackelem = state->buffer_stack;
    1528          883 : 
    1529              :     if (stackelem != NULL)
    1530          883 :     {
    1531              :         yy_switch_to_buffer(stackelem->buf, state->scanner);
    1532            0 :         state->curline = stackelem->bufstring;
    1533            0 :         state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
    1534            0 :     }
    1535              :     else
    1536              :     {
    1537              :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1538          883 :         state->curline = state->scanbuf;
    1539          883 :         state->refline = state->scanline;
    1540          883 :     }
    1541              : }
    1542          883 : 
    1543              : /*
    1544              :  * Check if specified variable name is the source for any string
    1545              :  * currently being scanned
    1546              :  */
    1547              : bool
    1548              : psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
    1549          883 : {
    1550              :     StackElem  *stackelem;
    1551              : 
    1552              :     for (stackelem = state->buffer_stack;
    1553          883 :          stackelem != NULL;
    1554          883 :          stackelem = stackelem->next)
    1555            0 :     {
    1556              :         if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
    1557            0 :             return true;
    1558            0 :     }
    1559              :     return false;
    1560          883 : }
    1561              : 
    1562              : /*
    1563              :  * Set up a flex input buffer to scan the given data.  We always make a
    1564              :  * copy of the data.  If working in an unsafe encoding, the copy has
    1565              :  * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
    1566              :  *
    1567              :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1568              :  */
    1569              : YY_BUFFER_STATE
    1570              : psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
    1571       522548 :                         char **txtcopy)
    1572              : {
    1573              :     char       *newtxt;
    1574              : 
    1575              :     /* Flex wants two \0 characters after the actual data */
    1576              :     newtxt = pg_malloc_array(char, (len + 2));
    1577       522548 :     *txtcopy = newtxt;
    1578       522548 :     newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
    1579       522548 : 
    1580              :     if (state->safe_encoding)
    1581       522548 :         memcpy(newtxt, txt, len);
    1582       522408 :     else
    1583              :     {
    1584              :         /* Gotta do it the hard way */
    1585              :         int         i = 0;
    1586          140 : 
    1587              :         while (i < len)
    1588          808 :         {
    1589              :             int         thislen = PQmblen(txt + i, state->encoding);
    1590          668 : 
    1591              :             /* first byte should always be okay... */
    1592              :             newtxt[i] = txt[i];
    1593          668 :             i++;
    1594          668 :             while (--thislen > 0 && i < len)
    1595          808 :                 newtxt[i++] = (char) 0xFF;
    1596          140 :         }
    1597              :     }
    1598              : 
    1599              :     return yy_scan_buffer(newtxt, len + 2, state->scanner);
    1600       522548 : }
    1601              : 
    1602              : /*
    1603              :  * psqlscan_emit() --- body for ECHO macro
    1604              :  *
    1605              :  * NB: this must be used for ALL and ONLY the text copied from the flex
    1606              :  * input data.  If you pass it something that is not part of the yytext
    1607              :  * string, you are making a mistake.  Internally generated text can be
    1608              :  * appended directly to state->output_buf.
    1609              :  */
    1610              : void
    1611              : psqlscan_emit(PsqlScanState state, const char *txt, int len)
    1612      6602050 : {
    1613              :     PQExpBuffer output_buf = state->output_buf;
    1614      6602050 : 
    1615              :     if (state->safe_encoding)
    1616      6602050 :         appendBinaryPQExpBuffer(output_buf, txt, len);
    1617      6601574 :     else
    1618              :     {
    1619              :         /* Gotta do it the hard way */
    1620              :         const char *reference = state->refline;
    1621          476 :         int         i;
    1622              : 
    1623              :         reference += (txt - state->curline);
    1624          476 : 
    1625              :         for (i = 0; i < len; i++)
    1626         1277 :         {
    1627              :             char        ch = txt[i];
    1628          801 : 
    1629              :             if (ch == (char) 0xFF)
    1630          801 :                 ch = reference[i];
    1631          140 :             appendPQExpBufferChar(output_buf, ch);
    1632          801 :         }
    1633              :     }
    1634              : }
    1635      6602050 : 
    1636              : /*
    1637              :  * psqlscan_extract_substring --- fetch value of (part of) the current token
    1638              :  *
    1639              :  * This is like psqlscan_emit(), except that the data is returned as a
    1640              :  * malloc'd string rather than being pushed directly to state->output_buf.
    1641              :  */
    1642              : char *
    1643              : psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
    1644         3391 : {
    1645              :     char       *result = pg_malloc_array(char, (len + 1));
    1646         3391 : 
    1647              :     if (state->safe_encoding)
    1648         3391 :         memcpy(result, txt, len);
    1649         3391 :     else
    1650              :     {
    1651              :         /* Gotta do it the hard way */
    1652              :         const char *reference = state->refline;
    1653            0 :         int         i;
    1654              : 
    1655              :         reference += (txt - state->curline);
    1656            0 : 
    1657              :         for (i = 0; i < len; i++)
    1658            0 :         {
    1659              :             char        ch = txt[i];
    1660            0 : 
    1661              :             if (ch == (char) 0xFF)
    1662            0 :                 ch = reference[i];
    1663            0 :             result[i] = ch;
    1664            0 :         }
    1665              :     }
    1666              :     result[len] = '\0';
    1667         3391 :     return result;
    1668         3391 : }
    1669              : 
    1670              : /*
    1671              :  * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
    1672              :  *
    1673              :  * If the variable name is found, escape its value using the appropriate
    1674              :  * quoting method and emit the value to output_buf.  (Since the result is
    1675              :  * surely quoted, there is never any reason to rescan it.)  If we don't
    1676              :  * find the variable or escaping fails, emit the token as-is.
    1677              :  */
    1678              : void
    1679              : psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
    1680          721 :                          PsqlScanQuoteType quote)
    1681              : {
    1682              :     char       *varname;
    1683              :     char       *value;
    1684              : 
    1685              :     /* Variable lookup. */
    1686              :     varname = psqlscan_extract_substring(state, txt + 2, len - 3);
    1687          721 :     if (state->callbacks->get_variable)
    1688          721 :         value = state->callbacks->get_variable(varname, quote,
    1689          721 :                                                state->cb_passthrough);
    1690              :     else
    1691              :         value = NULL;
    1692            0 :     free(varname);
    1693          721 : 
    1694              :     if (value)
    1695          721 :     {
    1696              :         /* Emit the suitably-escaped value */
    1697              :         appendPQExpBufferStr(state->output_buf, value);
    1698          684 :         free(value);
    1699          684 :     }
    1700              :     else
    1701              :     {
    1702              :         /* Emit original token as-is */
    1703              :         psqlscan_emit(state, txt, len);
    1704           37 :     }
    1705              : }
    1706          721 : 
    1707              : void
    1708              : psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
    1709           21 : {
    1710              :     char       *varname;
    1711              :     char       *value;
    1712              : 
    1713              :     varname = psqlscan_extract_substring(state, txt + 3, len - 4);
    1714           21 :     if (state->callbacks->get_variable)
    1715           21 :         value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
    1716           21 :                                                state->cb_passthrough);
    1717              :     else
    1718              :         value = NULL;
    1719            0 :     free(varname);
    1720           21 : 
    1721              :     if (value != NULL)
    1722           21 :     {
    1723              :         appendPQExpBufferStr(state->output_buf, "TRUE");
    1724            9 :         free(value);
    1725            9 :     }
    1726              :     else
    1727              :     {
    1728              :         appendPQExpBufferStr(state->output_buf, "FALSE");
    1729           12 :     }
    1730              : }
    1731           21 : /* END: function "psqlscan_test_variable" */
        

Generated by: LCOV version 2.0-1