LCOV - code coverage report
Current view: top level - src/fe_utils - psqlscan.l (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 82.9 % 574 476
Test Date: 2026-04-03 11:15:52 Functions: 100.0 % 19 19
Legend: Lines:     hit not hit

            Line data    Source code
       1              : %top{
       2              : /*-------------------------------------------------------------------------
       3              :  *
       4              :  * psqlscan.l
       5              :  *    lexical scanner for SQL commands
       6              :  *
       7              :  * This lexer used to be part of psql, and that heritage is reflected in
       8              :  * the file name as well as function and typedef names, though it can now
       9              :  * be used by other frontend programs as well.  It's also possible to extend
      10              :  * this lexer with a compatible add-on lexer to handle program-specific
      11              :  * backslash commands.
      12              :  *
      13              :  * This code is mainly concerned with determining where the end of a SQL
      14              :  * statement is: we are looking for semicolons that are not within quotes,
      15              :  * comments, or parentheses.  The most reliable way to handle this is to
      16              :  * borrow the backend's flex lexer rules, lock, stock, and barrel.  The rules
      17              :  * below are (except for a few) the same as the backend's, but their actions
      18              :  * are just ECHO whereas the backend's actions generally do other things.
      19              :  *
      20              :  * XXX The rules in this file must be kept in sync with the backend lexer!!!
      21              :  *
      22              :  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
      23              :  *
      24              :  * See psqlscan_int.h for additional commentary.
      25              :  *
      26              :  *
      27              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      28              :  * Portions Copyright (c) 1994, Regents of the University of California
      29              :  *
      30              :  * IDENTIFICATION
      31              :  *    src/fe_utils/psqlscan.l
      32              :  *
      33              :  *-------------------------------------------------------------------------
      34              :  */
      35              : #include "postgres_fe.h"
      36              : 
      37              : #include "common/logging.h"
      38              : #include "fe_utils/psqlscan.h"
      39              : 
      40              : #include "libpq-fe.h"
      41              : }
      42              : 
      43              : %{
      44              : 
      45              : /* LCOV_EXCL_START */
      46              : 
      47              : #include "fe_utils/psqlscan_int.h"
      48              : 
      49              : /*
      50              :  * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
      51              :  * doesn't presently make use of that argument, so just declare it as int.
      52              :  */
      53              : typedef int YYSTYPE;
      54              : 
      55              : 
      56              : /* Return values from yylex() */
      57              : #define LEXRES_EOL          0   /* end of input */
      58              : #define LEXRES_SEMI         1   /* command-terminating semicolon found */
      59              : #define LEXRES_BACKSLASH    2   /* backslash command start */
      60              : 
      61              : 
      62              : #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
      63              : 
      64              : %}
      65              : 
      66              : %option reentrant
      67              : %option bison-bridge
      68              : %option 8bit
      69              : %option never-interactive
      70              : %option nodefault
      71              : %option noinput
      72              : %option nounput
      73              : %option noyywrap
      74              : %option warn
      75              : %option prefix="psql_yy"
      76              : 
      77              : /*
      78              :  * Set the type of yyextra; we use it as a pointer back to the containing
      79              :  * PsqlScanState.
      80              :  */
      81              : %option extra-type="PsqlScanState"
      82              : 
      83              : /*
      84              :  * All of the following definitions and rules should exactly match
      85              :  * src/backend/parser/scan.l so far as the flex patterns are concerned.
      86              :  * The rule bodies are just ECHO as opposed to what the backend does,
      87              :  * however.  (But be sure to duplicate code that affects the lexing process,
      88              :  * such as BEGIN() and yyless().)  Also, psqlscan uses a single <<EOF>> rule
      89              :  * whereas scan.l has a separate one for each exclusive state.
      90              :  */
      91              : 
      92              : /*
      93              :  * OK, here is a short description of lex/flex rules behavior.
      94              :  * The longest pattern which matches an input string is always chosen.
      95              :  * For equal-length patterns, the first occurring in the rules list is chosen.
      96              :  * INITIAL is the starting state, to which all non-conditional rules apply.
      97              :  * Exclusive states change parsing rules while the state is active.  When in
      98              :  * an exclusive state, only those rules defined for that state apply.
      99              :  *
     100              :  * We use exclusive states for quoted strings, extended comments,
     101              :  * and to eliminate parsing troubles for numeric strings.
     102              :  * Exclusive states:
     103              :  *  <xb> bit string literal
     104              :  *  <xc> extended C-style comments
     105              :  *  <xd> delimited identifiers (double-quoted identifiers)
     106              :  *  <xh> hexadecimal byte string
     107              :  *  <xq> standard quoted strings
     108              :  *  <xqs> quote stop (detect continued strings)
     109              :  *  <xe> extended quoted strings (support backslash escape sequences)
     110              :  *  <xdolq> $foo$ quoted strings
     111              :  *  <xui> quoted identifier with Unicode escapes
     112              :  *  <xus> quoted string with Unicode escapes
     113              :  *
     114              :  * Note: we intentionally don't mimic the backend's <xeu> state; we have
     115              :  * no need to distinguish it from <xe> state, and no good way to get out
     116              :  * of it in error cases.  The backend just throws yyerror() in those
     117              :  * cases, but that's not an option here.
     118              :  */
     119              : 
     120              : %x xb
     121              : %x xc
     122              : %x xd
     123              : %x xh
     124              : %x xq
     125              : %x xqs
     126              : %x xe
     127              : %x xdolq
     128              : %x xui
     129              : %x xus
     130              : 
     131              : /*
     132              :  * In order to make the world safe for Windows and Mac clients as well as
     133              :  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
     134              :  * sequence will be seen as two successive newlines, but that doesn't cause
     135              :  * any problems.  Comments that start with -- and extend to the next
     136              :  * newline are treated as equivalent to a single whitespace character.
     137              :  *
     138              :  * NOTE a fine point: if there is no newline following --, we will absorb
     139              :  * everything to the end of the input as a comment.  This is correct.  Older
     140              :  * versions of Postgres failed to recognize -- as a comment if the input
     141              :  * did not end with a newline.
     142              :  *
     143              :  * non_newline_space tracks all space characters except newlines.
     144              :  *
     145              :  * XXX if you change the set of whitespace characters, fix scanner_isspace()
     146              :  * to agree.
     147              :  */
     148              : 
     149              : space               [ \t\n\r\f\v]
     150              : non_newline_space   [ \t\f\v]
     151              : newline             [\n\r]
     152              : non_newline         [^\n\r]
     153              : 
     154              : comment         ("--"{non_newline}*)
     155              : 
     156              : whitespace      ({space}+|{comment})
     157              : 
     158              : /*
     159              :  * SQL requires at least one newline in the whitespace separating
     160              :  * string literals that are to be concatenated.  Silly, but who are we
     161              :  * to argue?  Note that {whitespace_with_newline} should not have * after
     162              :  * it, whereas {whitespace} should generally have a * after it...
     163              :  */
     164              : 
     165              : special_whitespace      ({space}+|{comment}{newline})
     166              : non_newline_whitespace  ({non_newline_space}|{comment})
     167              : whitespace_with_newline ({non_newline_whitespace}*{newline}{special_whitespace}*)
     168              : 
     169              : quote           '
     170              : /* If we see {quote} then {quotecontinue}, the quoted string continues */
     171              : quotecontinue   {whitespace_with_newline}{quote}
     172              : 
     173              : /*
     174              :  * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
     175              :  * {quotecontinue}.  It might seem that this could just be {whitespace}*,
     176              :  * but if there's a dash after {whitespace_with_newline}, it must be consumed
     177              :  * to see if there's another dash --- which would start a {comment} and thus
     178              :  * allow continuation of the {quotecontinue} token.
     179              :  */
     180              : quotecontinuefail   {whitespace}*"-"?
     181              : 
     182              : /* Bit string
     183              :  * It is tempting to scan the string for only those characters
     184              :  * which are allowed. However, this leads to silently swallowed
     185              :  * characters if illegal characters are included in the string.
     186              :  * For example, if xbinside is [01] then B'ABCD' is interpreted
     187              :  * as a zero-length string, and the ABCD' is lost!
     188              :  * Better to pass the string forward and let the input routines
     189              :  * validate the contents.
     190              :  */
     191              : xbstart         [bB]{quote}
     192              : xbinside        [^']*
     193              : 
     194              : /* Hexadecimal byte string */
     195              : xhstart         [xX]{quote}
     196              : xhinside        [^']*
     197              : 
     198              : /* National character */
     199              : xnstart         [nN]{quote}
     200              : 
     201              : /* Quoted string that allows backslash escapes */
     202              : xestart         [eE]{quote}
     203              : xeinside        [^\\']+
     204              : xeescape        [\\][^0-7]
     205              : xeoctesc        [\\][0-7]{1,3}
     206              : xehexesc        [\\]x[0-9A-Fa-f]{1,2}
     207              : xeunicode       [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
     208              : xeunicodefail   [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
     209              : 
     210              : /* Extended quote
     211              :  * xqdouble implements embedded quote, ''''
     212              :  */
     213              : xqstart         {quote}
     214              : xqdouble        {quote}{quote}
     215              : xqinside        [^']+
     216              : 
     217              : /* $foo$ style quotes ("dollar quoting")
     218              :  * The quoted string starts with $foo$ where "foo" is an optional string
     219              :  * in the form of an identifier, except that it may not contain "$",
     220              :  * and extends to the first occurrence of an identical string.
     221              :  * There is *no* processing of the quoted text.
     222              :  *
     223              :  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
     224              :  * fails to match its trailing "$".
     225              :  */
     226              : dolq_start      [A-Za-z\200-\377_]
     227              : dolq_cont       [A-Za-z\200-\377_0-9]
     228              : dolqdelim       \$({dolq_start}{dolq_cont}*)?\$
     229              : dolqfailed      \${dolq_start}{dolq_cont}*
     230              : dolqinside      [^$]+
     231              : 
     232              : /* Double quote
     233              :  * Allows embedded spaces and other special characters into identifiers.
     234              :  */
     235              : dquote          \"
     236              : xdstart         {dquote}
     237              : xdstop          {dquote}
     238              : xddouble        {dquote}{dquote}
     239              : xdinside        [^"]+
     240              : 
     241              : /* Quoted identifier with Unicode escapes */
     242              : xuistart        [uU]&{dquote}
     243              : 
     244              : /* Quoted string with Unicode escapes */
     245              : xusstart        [uU]&{quote}
     246              : 
     247              : /* error rule to avoid backup */
     248              : xufailed        [uU]&
     249              : 
     250              : 
     251              : /* C-style comments
     252              :  *
     253              :  * The "extended comment" syntax closely resembles allowable operator syntax.
     254              :  * The tricky part here is to get lex to recognize a string starting with
     255              :  * slash-star as a comment, when interpreting it as an operator would produce
     256              :  * a longer match --- remember lex will prefer a longer match!  Also, if we
     257              :  * have something like plus-slash-star, lex will think this is a 3-character
     258              :  * operator whereas we want to see it as a + operator and a comment start.
     259              :  * The solution is two-fold:
     260              :  * 1. append {op_chars}* to xcstart so that it matches as much text as
     261              :  *    {operator} would. Then the tie-breaker (first matching rule of same
     262              :  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
     263              :  *    in case it contains a star-slash that should terminate the comment.
     264              :  * 2. In the operator rule, check for slash-star within the operator, and
     265              :  *    if found throw it back with yyless().  This handles the plus-slash-star
     266              :  *    problem.
     267              :  * Dash-dash comments have similar interactions with the operator rule.
     268              :  */
     269              : xcstart         \/\*{op_chars}*
     270              : xcstop          \*+\/
     271              : xcinside        [^*/]+
     272              : 
     273              : ident_start     [A-Za-z\200-\377_]
     274              : ident_cont      [A-Za-z\200-\377_0-9\$]
     275              : 
     276              : identifier      {ident_start}{ident_cont}*
     277              : 
     278              : /* Assorted special-case operators and operator-like tokens */
     279              : typecast        "::"
     280              : dot_dot         \.\.
     281              : colon_equals    ":="
     282              : 
     283              : /*
     284              :  * These operator-like tokens (unlike the above ones) also match the {operator}
     285              :  * rule, which means that they might be overridden by a longer match if they
     286              :  * are followed by a comment start or a + or - character. Accordingly, if you
     287              :  * add to this list, you must also add corresponding code to the {operator}
     288              :  * block to return the correct token in such cases. (This is not needed in
     289              :  * psqlscan.l since the token value is ignored there.)
     290              :  */
     291              : equals_greater  "=>"
     292              : less_equals     "<="
     293              : greater_equals  ">="
     294              : less_greater    "<>"
     295              : not_equals      "!="
     296              : /* Note there is no need for left_arrow, since "<-" is not a single operator. */
     297              : right_arrow     "->"
     298              : 
     299              : /*
     300              :  * "self" is the set of chars that should be returned as single-character
     301              :  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
     302              :  * which can be one or more characters long (but if a single-char token
     303              :  * appears in the "self" set, it is not to be returned as an Op).  Note
     304              :  * that the sets overlap, but each has some chars that are not in the other.
     305              :  *
     306              :  * If you change either set, adjust the character lists appearing in the
     307              :  * rule for "operator"!
     308              :  */
     309              : self            [,()\[\].;\:\|\+\-\*\/\%\^\<\>\=]
     310              : op_chars        [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
     311              : operator        {op_chars}+
     312              : 
     313              : /*
     314              :  * Numbers
     315              :  *
     316              :  * Unary minus is not part of a number here.  Instead we pass it separately to
     317              :  * the parser, and there it gets coerced via doNegate().
     318              :  *
     319              :  * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
     320              :  *
     321              :  * {realfail} is added to prevent the need for scanner
     322              :  * backup when the {real} rule fails to match completely.
     323              :  */
     324              : decdigit        [0-9]
     325              : hexdigit        [0-9A-Fa-f]
     326              : octdigit        [0-7]
     327              : bindigit        [0-1]
     328              : 
     329              : decinteger      {decdigit}(_?{decdigit})*
     330              : hexinteger      0[xX](_?{hexdigit})+
     331              : octinteger      0[oO](_?{octdigit})+
     332              : bininteger      0[bB](_?{bindigit})+
     333              : 
     334              : hexfail         0[xX]_?
     335              : octfail         0[oO]_?
     336              : binfail         0[bB]_?
     337              : 
     338              : numeric         (({decinteger}\.{decinteger}?)|(\.{decinteger}))
     339              : numericfail     {decinteger}\.\.
     340              : 
     341              : real            ({decinteger}|{numeric})[Ee][-+]?{decinteger}
     342              : realfail        ({decinteger}|{numeric})[Ee][-+]
     343              : 
     344              : /* Positional parameters don't accept underscores. */
     345              : param           \${decdigit}+
     346              : 
     347              : /*
     348              :  * An identifier immediately following an integer literal is disallowed because
     349              :  * in some cases it's ambiguous what is meant: for example, 0x1234 could be
     350              :  * either a hexinteger or a decinteger "0" and an identifier "x1234".  We can
     351              :  * detect such problems by seeing if integer_junk matches a longer substring
     352              :  * than any of the XXXinteger patterns (decinteger, hexinteger, octinteger,
     353              :  * bininteger).  One "junk" pattern is sufficient because
     354              :  * {decinteger}{identifier} will match all the same strings we'd match with
     355              :  * {hexinteger}{identifier} etc.
     356              :  *
     357              :  * Note that the rule for integer_junk must appear after the ones for
     358              :  * XXXinteger to make this work correctly: 0x1234 will match both hexinteger
     359              :  * and integer_junk, and we need hexinteger to be chosen in that case.
     360              :  *
     361              :  * Also disallow strings matched by numeric_junk, real_junk and param_junk
     362              :  * for consistency.
     363              :  */
     364              : integer_junk    {decinteger}{identifier}
     365              : numeric_junk    {numeric}{identifier}
     366              : real_junk       {real}{identifier}
     367              : param_junk      \${decdigit}+{identifier}
     368              : 
     369              : /* psql-specific: characters allowed in variable names */
     370              : variable_char   [A-Za-z\200-\377_0-9]
     371              : 
     372              : other           .
     373              : 
     374              : /*
     375              :  * Dollar quoted strings are totally opaque, and no escaping is done on them.
     376              :  * Other quoted strings must allow some special characters such as single-quote
     377              :  *  and newline.
     378              :  * Embedded single-quotes are implemented both in the SQL standard
     379              :  *  style of two adjacent single quotes "''" and in the Postgres/Java style
     380              :  *  of escaped-quote "\'".
     381              :  * Other embedded escaped characters are matched explicitly and the leading
     382              :  *  backslash is dropped from the string.
     383              :  * Note that xcstart must appear before operator, as explained above!
     384              :  *  Also whitespace (comment) must appear before operator.
     385              :  */
     386              : 
     387              : %%
     388              : 
     389              : %{
     390              :         /* Declare some local variables inside yylex(), for convenience */
     391              :         PsqlScanState cur_state = yyextra;
     392       787457 :         PQExpBuffer output_buf = cur_state->output_buf;
     393       787457 : 
     394              :         /*
     395              :          * Force flex into the state indicated by start_state.  This has a
     396              :          * couple of purposes: it lets some of the functions below set a new
     397              :          * starting state without ugly direct access to flex variables, and it
     398              :          * allows us to transition from one flex lexer to another so that we
     399              :          * can lex different parts of the source string using separate lexers.
     400              :          */
     401              :         BEGIN(cur_state->start_state);
     402       787457 : %}
     403              : 
     404              : {whitespace}    {
     405              :                     /*
     406              :                      * Note that the whitespace rule includes both true
     407              :                      * whitespace and single-line ("--" style) comments.
     408              :                      * We suppress whitespace until we have collected some
     409              :                      * non-whitespace data.  (This interacts with some
     410              :                      * decisions in MainLoop(); see there for details.)
     411              :                      */
     412              :                     if (output_buf->len > 0)
     413      1869393 :                         ECHO;
     414      1758265 :                 }
     415              : 
     416      1869393 : {xcstart}       {
     417          447 :                     cur_state->xcdepth = 0;
     418          447 :                     BEGIN(xc);
     419          447 :                     /* Put back any characters past slash-star; see above */
     420              :                     yyless(2);
     421          447 :                     ECHO;
     422          447 :                 }
     423              : 
     424          447 : <xc>{
     425              : {xcstart}       {
     426           12 :                     cur_state->xcdepth++;
     427           12 :                     /* Put back any characters past slash-star; see above */
     428              :                     yyless(2);
     429           12 :                     ECHO;
     430           12 :                 }
     431              : 
     432           12 : {xcstop}        {
     433          459 :                     if (cur_state->xcdepth <= 0)
     434          459 :                         BEGIN(INITIAL);
     435          447 :                     else
     436              :                         cur_state->xcdepth--;
     437           12 :                     ECHO;
     438          459 :                 }
     439              : 
     440          459 : {xcinside}      {
     441         1024 :                     ECHO;
     442         1024 :                 }
     443              : 
     444         1024 : {op_chars}      {
     445          275 :                     ECHO;
     446          275 :                 }
     447              : 
     448          275 : \*+             {
     449            0 :                     ECHO;
     450            0 :                 }
     451              : } /* <xc> */
     452            0 : 
     453              : {xbstart}       {
     454          500 :                     BEGIN(xb);
     455          500 :                     ECHO;
     456          500 :                 }
     457              : <xh>{xhinside}    |
     458          500 : <xb>{xbinside}    {
     459         2695 :                     ECHO;
     460         2695 :                 }
     461              : 
     462         2695 : {xhstart}       {
     463         2215 :                     /* Hexadecimal bit type.
     464              :                      * At some point we should simply pass the string
     465              :                      * forward to the parser and label it there.
     466              :                      * In the meantime, place a leading "x" on the string
     467              :                      * to mark it for the input routine as a hex string.
     468              :                      */
     469              :                     BEGIN(xh);
     470         2215 :                     ECHO;
     471         2215 :                 }
     472              : 
     473         2215 : {xnstart}       {
     474            0 :                     yyless(1);  /* eat only 'n' this time */
     475            0 :                     ECHO;
     476            0 :                 }
     477              : 
     478            0 : {xqstart}       {
     479       157333 :                     if (cur_state->std_strings)
     480       157333 :                         BEGIN(xq);
     481       157333 :                     else
     482              :                         BEGIN(xe);
     483            0 :                     ECHO;
     484       157333 :                 }
     485              : {xestart}       {
     486       157333 :                     BEGIN(xe);
     487          868 :                     ECHO;
     488          868 :                 }
     489              : {xusstart}      {
     490          868 :                     BEGIN(xus);
     491          368 :                     ECHO;
     492          368 :                 }
     493              : 
     494          368 : <xb,xh,xq,xe,xus>{quote} {
     495       161284 :                     /*
     496              :                      * When we are scanning a quoted string and see an end
     497              :                      * quote, we must look ahead for a possible continuation.
     498              :                      * If we don't see one, we know the end quote was in fact
     499              :                      * the end of the string.  To reduce the lexer table size,
     500              :                      * we use a single "xqs" state to do the lookahead for all
     501              :                      * types of strings.
     502              :                      */
     503              :                     cur_state->state_before_str_stop = YYSTATE;
     504       161284 :                     BEGIN(xqs);
     505       161284 :                     ECHO;
     506       161284 :                 }
     507              : <xqs>{quotecontinue} {
     508       161284 :                     /*
     509            0 :                      * Found a quote continuation, so return to the in-quote
     510              :                      * state and continue scanning the literal.  Nothing is
     511              :                      * added to the literal's contents.
     512              :                      */
     513              :                     BEGIN(cur_state->state_before_str_stop);
     514            0 :                     ECHO;
     515            0 :                 }
     516              : <xqs>{quotecontinuefail} |
     517            0 : <xqs>{other}  {
     518       160472 :                     /*
     519              :                      * Failed to see a quote continuation.  Throw back
     520              :                      * everything after the end quote, and handle the string
     521              :                      * according to the state we were in previously.
     522              :                      */
     523              :                     yyless(0);
     524       160472 :                     BEGIN(INITIAL);
     525       160472 :                     /* There's nothing to echo ... */
     526              :                 }
     527              : 
     528       160472 : <xq,xe,xus>{xqdouble} {
     529         4130 :                     ECHO;
     530         4130 :                 }
     531              : <xq,xus>{xqinside}  {
     532         4130 :                     ECHO;
     533       165002 :                 }
     534              : <xe>{xeinside}  {
     535       165002 :                     ECHO;
     536         1659 :                 }
     537              : <xe>{xeunicode} {
     538         1659 :                     ECHO;
     539          124 :                 }
     540              : <xe>{xeunicodefail}   {
     541          124 :                     ECHO;
     542            8 :                 }
     543              : <xe>{xeescape}  {
     544            8 :                     ECHO;
     545          959 :                 }
     546              : <xe>{xeoctesc}  {
     547          959 :                     ECHO;
     548           14 :                 }
     549              : <xe>{xehexesc}  {
     550           14 :                     ECHO;
     551            6 :                 }
     552              : <xe>.         {
     553            6 :                     /* This is only needed for \ just before EOF */
     554            0 :                     ECHO;
     555            0 :                 }
     556              : 
     557            0 : {dolqdelim}     {
     558         4537 :                     cur_state->dolqstart = pg_strdup(yytext);
     559         4537 :                     BEGIN(xdolq);
     560         4537 :                     ECHO;
     561         4537 :                 }
     562              : {dolqfailed}    {
     563         4537 :                     /* throw back all but the initial "$" */
     564            0 :                     yyless(1);
     565            0 :                     ECHO;
     566            0 :                 }
     567              : <xdolq>{dolqdelim} {
     568            0 :                     if (strcmp(yytext, cur_state->dolqstart) == 0)
     569         4753 :                     {
     570              :                         free(cur_state->dolqstart);
     571         4537 :                         cur_state->dolqstart = NULL;
     572         4537 :                         BEGIN(INITIAL);
     573         4537 :                     }
     574              :                     else
     575              :                     {
     576              :                         /*
     577              :                          * When we fail to match $...$ to dolqstart, transfer
     578              :                          * the $... part to the output, but put back the final
     579              :                          * $ for rescanning.  Consider $delim$...$junk$delim$
     580              :                          */
     581              :                         yyless(yyleng - 1);
     582          216 :                     }
     583              :                     ECHO;
     584         4753 :                 }
     585              : <xdolq>{dolqinside} {
     586         4753 :                     ECHO;
     587        23961 :                 }
     588              : <xdolq>{dolqfailed} {
     589        23961 :                     ECHO;
     590          574 :                 }
     591              : <xdolq>.      {
     592          574 :                     /* This is only needed for $ inside the quoted text */
     593         1588 :                     ECHO;
     594         1588 :                 }
     595              : 
     596         1588 : {xdstart}       {
     597         6528 :                     BEGIN(xd);
     598         6528 :                     ECHO;
     599         6528 :                 }
     600              : {xuistart}      {
     601         6528 :                     BEGIN(xui);
     602           16 :                     ECHO;
     603           16 :                 }
     604              : <xd>{xdstop}  {
     605           16 :                     BEGIN(INITIAL);
     606         6528 :                     ECHO;
     607         6528 :                 }
     608              : <xui>{dquote} {
     609         6528 :                     BEGIN(INITIAL);
     610           16 :                     ECHO;
     611           16 :                 }
     612              : <xd,xui>{xddouble}    {
     613           16 :                     ECHO;
     614           67 :                 }
     615              : <xd,xui>{xdinside}    {
     616           67 :                     ECHO;
     617         6605 :                 }
     618              : 
     619         6605 : {xufailed}  {
     620            0 :                     /* throw back all but the initial u/U */
     621              :                     yyless(1);
     622            0 :                     ECHO;
     623            0 :                 }
     624              : 
     625            0 : {typecast}      {
     626        35975 :                     ECHO;
     627        35975 :                 }
     628              : 
     629        35975 : {dot_dot}       {
     630            0 :                     ECHO;
     631            0 :                 }
     632              : 
     633            0 : {colon_equals}  {
     634         1673 :                     ECHO;
     635         1673 :                 }
     636              : 
     637         1673 : {equals_greater} {
     638         1361 :                     ECHO;
     639         1361 :                 }
     640              : 
     641         1361 : {less_equals}   {
     642         1590 :                     ECHO;
     643         1590 :                 }
     644              : 
     645         1590 : {greater_equals} {
     646         4186 :                     ECHO;
     647         4186 :                 }
     648              : 
     649         4186 : {less_greater}  {
     650          893 :                     ECHO;
     651          893 :                 }
     652              : 
     653          893 : {not_equals}    {
     654         1523 :                     ECHO;
     655         1523 :                 }
     656              : 
     657         1523 : {right_arrow}   {
     658          773 :                     ECHO;
     659          773 :                 }
     660              : 
     661          773 :     /*
     662              :      * These rules are specific to psql --- they implement parenthesis
     663              :      * counting and detection of command-ending semicolon.  These must
     664              :      * appear before the {self} rule so that they take precedence over it.
     665              :      */
     666              : 
     667       253327 : "("               {
     668              :                     cur_state->paren_depth++;
     669       253327 :                     ECHO;
     670       253327 :                 }
     671              : 
     672       253327 : ")"               {
     673       253318 :                     if (cur_state->paren_depth > 0)
     674       253318 :                         cur_state->paren_depth--;
     675       253318 :                     ECHO;
     676       253318 :                 }
     677              : 
     678       253318 : ";"               {
     679       241897 :                     ECHO;
     680       241897 :                     if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
     681       241897 :                     {
     682              :                         /* Terminate lexing temporarily */
     683              :                         cur_state->start_state = YY_START;
     684       241739 :                         cur_state->identifier_count = 0;
     685       241739 :                         return LEXRES_SEMI;
     686       241739 :                     }
     687              :                 }
     688              : 
     689          158 :     /*
     690              :      * psql-specific rules to handle backslash commands and variable
     691              :      * substitution.  We want these before {self}, also.
     692              :      */
     693              : 
     694          512 : "\\"[;:]      {
     695              :                     /* Force a semi-colon or colon into the query buffer */
     696              :                     psqlscan_emit(cur_state, yytext + 1, 1);
     697          512 :                     if (yytext[1] == ';')
     698          512 :                         cur_state->identifier_count = 0;
     699          512 :                 }
     700              : 
     701          512 : "\\"          {
     702        32239 :                     /* Terminate lexing temporarily */
     703              :                     cur_state->start_state = YY_START;
     704        32239 :                     return LEXRES_BACKSLASH;
     705        32239 :                 }
     706              : 
     707              : :{variable_char}+   {
     708         1778 :                     /* Possible psql variable substitution */
     709              :                     char       *varname;
     710              :                     char       *value;
     711              : 
     712              :                     varname = psqlscan_extract_substring(cur_state,
     713         1778 :                                                          yytext + 1,
     714         1778 :                                                          yyleng - 1);
     715         1778 :                     if (cur_state->callbacks->get_variable)
     716         1778 :                         value = cur_state->callbacks->get_variable(varname,
     717         1182 :                                                                    PQUOTE_PLAIN,
     718              :                                                                    cur_state->cb_passthrough);
     719              :                     else
     720              :                         value = NULL;
     721          596 : 
     722              :                     if (value)
     723         1778 :                     {
     724              :                         /* It is a variable, check for recursion */
     725              :                         if (psqlscan_var_is_current_source(cur_state, varname))
     726          874 :                         {
     727              :                             /* Recursive expansion --- don't go there */
     728              :                             pg_log_warning("skipping recursive expansion of variable \"%s\"",
     729            0 :                                                               varname);
     730              :                             /* Instead copy the string as is */
     731              :                             ECHO;
     732            0 :                         }
     733              :                         else
     734              :                         {
     735              :                             /* OK, perform substitution */
     736              :                             psqlscan_push_new_buffer(cur_state, value, varname);
     737          874 :                             /* yy_scan_string already made buffer active */
     738              :                         }
     739              :                         free(value);
     740          874 :                     }
     741              :                     else
     742              :                     {
     743              :                         /*
     744              :                          * if the variable doesn't exist we'll copy the string
     745              :                          * as is
     746              :                          */
     747              :                         ECHO;
     748          904 :                     }
     749              : 
     750              :                     free(varname);
     751         1778 :                 }
     752              : 
     753         1778 : :'{variable_char}+' {
     754          612 :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     755          612 :                                              PQUOTE_SQL_LITERAL);
     756              :                 }
     757              : 
     758          612 : :\"{variable_char}+\" {
     759           21 :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     760           21 :                                              PQUOTE_SQL_IDENT);
     761              :                 }
     762              : 
     763           21 : :\{\?{variable_char}+\} {
     764            8 :                     psqlscan_test_variable(cur_state, yytext, yyleng);
     765            8 :                 }
     766              : 
     767            8 :     /*
     768              :      * These rules just avoid the need for scanner backup if one of the
     769              :      * three rules above fails to match completely.
     770              :      */
     771              : 
     772            0 : :'{variable_char}*  {
     773              :                     /* Throw back everything but the colon */
     774              :                     yyless(1);
     775            0 :                     ECHO;
     776            0 :                 }
     777              : 
     778            0 : :\"{variable_char}*    {
     779            0 :                     /* Throw back everything but the colon */
     780              :                     yyless(1);
     781            0 :                     ECHO;
     782            0 :                 }
     783              : 
     784            0 : :\{\?{variable_char}*   {
     785            0 :                     /* Throw back everything but the colon */
     786              :                     yyless(1);
     787            0 :                     ECHO;
     788            0 :                 }
     789              : :\{ {
     790            0 :                     /* Throw back everything but the colon */
     791            0 :                     yyless(1);
     792            0 :                     ECHO;
     793            0 :                 }
     794              : 
     795            0 :     /*
     796              :      * Back to backend-compatible rules.
     797              :      */
     798              : 
     799       440997 : {self}          {
     800              :                     ECHO;
     801       440997 :                 }
     802              : 
     803       440997 : {operator}      {
     804        12735 :                     /*
     805              :                      * Check for embedded slash-star or dash-dash; those
     806              :                      * are comment starts, so operator must stop there.
     807              :                      * Note that slash-star or dash-dash at the first
     808              :                      * character will match a prior rule, not this one.
     809              :                      */
     810              :                     int         nchars = yyleng;
     811        12735 :                     char       *slashstar = strstr(yytext, "/*");
     812        12735 :                     char       *dashdash = strstr(yytext, "--");
     813        12735 : 
     814              :                     if (slashstar && dashdash)
     815        12735 :                     {
     816              :                         /* if both appear, take the first one */
     817              :                         if (slashstar > dashdash)
     818            0 :                             slashstar = dashdash;
     819            0 :                     }
     820              :                     else if (!slashstar)
     821        12735 :                         slashstar = dashdash;
     822        12695 :                     if (slashstar)
     823        12735 :                         nchars = slashstar - yytext;
     824           48 : 
     825              :                     /*
     826              :                      * For SQL compatibility, '+' and '-' cannot be the
     827              :                      * last char of a multi-char operator unless the operator
     828              :                      * contains chars that are not in SQL operators.
     829              :                      * The idea is to lex '=-' as two operators, but not
     830              :                      * to forbid operator names like '?-' that could not be
     831              :                      * sequences of SQL operators.
     832              :                      */
     833              :                     if (nchars > 1 &&
     834        12735 :                         (yytext[nchars - 1] == '+' ||
     835        11706 :                          yytext[nchars - 1] == '-'))
     836        11702 :                     {
     837              :                         int         ic;
     838              : 
     839              :                         for (ic = nchars - 2; ic >= 0; ic--)
     840          385 :                         {
     841              :                             char c = yytext[ic];
     842          326 :                             if (c == '~' || c == '!' || c == '@' ||
     843          326 :                                 c == '#' || c == '^' || c == '&' ||
     844          270 :                                 c == '|' || c == '`' || c == '?' ||
     845          106 :                                 c == '%')
     846              :                                 break;
     847              :                         }
     848              :                         if (ic < 0)
     849          291 :                         {
     850              :                             /*
     851              :                              * didn't find a qualifying character, so remove
     852              :                              * all trailing [+-]
     853              :                              */
     854              :                             do {
     855              :                                 nchars--;
     856           59 :                             } while (nchars > 1 &&
     857           59 :                                  (yytext[nchars - 1] == '+' ||
     858           23 :                                   yytext[nchars - 1] == '-'));
     859           23 :                         }
     860              :                     }
     861              : 
     862              :                     if (nchars < yyleng)
     863        12735 :                     {
     864              :                         /* Strip the unwanted chars from the token */
     865              :                         yyless(nchars);
     866          107 :                     }
     867              :                     ECHO;
     868        12735 :                 }
     869              : 
     870        12735 : {param}         {
     871          779 :                     ECHO;
     872          779 :                 }
     873              : {param_junk}    {
     874          779 :                     ECHO;
     875            8 :                 }
     876              : 
     877            8 : {decinteger}    {
     878       139645 :                     ECHO;
     879       139645 :                 }
     880              : {hexinteger}    {
     881       139645 :                     ECHO;
     882           83 :                 }
     883              : {octinteger}    {
     884           83 :                     ECHO;
     885           40 :                 }
     886              : {bininteger}    {
     887           40 :                     ECHO;
     888           40 :                 }
     889              : {hexfail}       {
     890           40 :                     ECHO;
     891            4 :                 }
     892              : {octfail}       {
     893            4 :                     ECHO;
     894            4 :                 }
     895              : {binfail}       {
     896            4 :                     ECHO;
     897            4 :                 }
     898              : {numeric}       {
     899            4 :                     ECHO;
     900         5274 :                 }
     901              : {numericfail}   {
     902         5274 :                     /* throw back the .., and treat as integer */
     903            0 :                     yyless(yyleng - 2);
     904            0 :                     ECHO;
     905            0 :                 }
     906              : {real}          {
     907            0 :                     ECHO;
     908          418 :                 }
     909              : {realfail}      {
     910          418 :                     ECHO;
     911            4 :                 }
     912              : {integer_junk}  {
     913            4 :                     ECHO;
     914           44 :                 }
     915              : {numeric_junk}  {
     916           44 :                     ECHO;
     917           32 :                 }
     918              : {real_junk}     {
     919           32 :                     ECHO;
     920            0 :                 }
     921              : 
     922            0 : 
     923      1817128 : {identifier}    {
     924              :                     /*
     925              :                      * We need to track if we are inside a BEGIN .. END block
     926              :                      * in a function definition, so that semicolons contained
     927              :                      * therein don't terminate the whole statement.  Short of
     928              :                      * writing a full parser here, the following heuristic
     929              :                      * should work.  First, we track whether the beginning of
     930              :                      * the statement matches CREATE [OR REPLACE]
     931              :                      * {FUNCTION|PROCEDURE}
     932              :                      */
     933              : 
     934              :                     if (cur_state->identifier_count == 0)
     935      1817128 :                         memset(cur_state->identifiers, 0, sizeof(cur_state->identifiers));
     936       248601 : 
     937              :                     if (pg_strcasecmp(yytext, "create") == 0 ||
     938      3591561 :                         pg_strcasecmp(yytext, "function") == 0 ||
     939      3541782 :                         pg_strcasecmp(yytext, "procedure") == 0 ||
     940      3532349 :                         pg_strcasecmp(yytext, "or") == 0 ||
     941      3525602 :                         pg_strcasecmp(yytext, "replace") == 0)
     942      1760602 :                     {
     943              :                         if (cur_state->identifier_count < sizeof(cur_state->identifiers))
     944        58008 :                             cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned char) yytext[0]);
     945        52226 :                     }
     946              : 
     947              :                     cur_state->identifier_count++;
     948      1817128 : 
     949              :                     if (cur_state->identifiers[0] == 'c' &&
     950      1817128 :                         (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' ||
     951       434695 :                          (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' &&
     952       396703 :                           (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p'))) &&
     953        14887 :                         cur_state->paren_depth == 0)
     954        47294 :                     {
     955              :                         if (pg_strcasecmp(yytext, "begin") == 0)
     956        37815 :                             cur_state->begin_depth++;
     957          110 :                         else if (pg_strcasecmp(yytext, "case") == 0)
     958        37705 :                         {
     959              :                             /*
     960              :                              * CASE also ends with END.  We only need to track
     961              :                              * this if we are already inside a BEGIN.
     962              :                              */
     963              :                             if (cur_state->begin_depth >= 1)
     964            4 :                                 cur_state->begin_depth++;
     965            4 :                         }
     966              :                         else if (pg_strcasecmp(yytext, "end") == 0)
     967        37701 :                         {
     968              :                             if (cur_state->begin_depth > 0)
     969          114 :                                 cur_state->begin_depth--;
     970          114 :                         }
     971              :                     }
     972              : 
     973              :                     ECHO;
     974      1817128 :                 }
     975              : 
     976      1817128 : {other}         {
     977            8 :                     ECHO;
     978            8 :                 }
     979              : 
     980            8 : <<EOF>>         {
     981       514353 :                     if (cur_state->buffer_stack == NULL)
     982       514353 :                     {
     983              :                         cur_state->start_state = YY_START;
     984       513479 :                         return LEXRES_EOL;      /* end of input reached */
     985       513479 :                     }
     986              : 
     987              :                     /*
     988              :                      * We were expanding a variable, so pop the inclusion
     989              :                      * stack and keep lexing
     990              :                      */
     991              :                     psqlscan_pop_buffer_stack(cur_state);
     992          874 :                     psqlscan_select_top_buffer(cur_state);
     993          874 :                 }
     994              : 
     995          874 : %%
     996            0 : 
     997              : /* LCOV_EXCL_STOP */
     998              : 
     999              : /*
    1000              :  * Create a lexer working state struct.
    1001              :  *
    1002              :  * callbacks is a struct of function pointers that encapsulate some
    1003              :  * behavior we need from the surrounding program.  This struct must
    1004              :  * remain valid for the lifespan of the PsqlScanState.
    1005              :  */
    1006              : PsqlScanState
    1007              : psql_scan_create(const PsqlScanCallbacks *callbacks)
    1008        10250 : {
    1009              :     PsqlScanState state;
    1010              : 
    1011              :     state = pg_malloc0_object(PsqlScanStateData);
    1012        10250 : 
    1013              :     state->callbacks = callbacks;
    1014        10250 : 
    1015              :     yylex_init(&state->scanner);
    1016        10250 : 
    1017              :     yyset_extra(state, state->scanner);
    1018        10250 : 
    1019              :     psql_scan_reset(state);
    1020        10250 : 
    1021              :     return state;
    1022        10250 : }
    1023              : 
    1024              : /*
    1025              :  * Destroy a lexer working state struct, releasing all resources.
    1026              :  */
    1027              : void
    1028              : psql_scan_destroy(PsqlScanState state)
    1029        10193 : {
    1030              :     psql_scan_finish(state);
    1031        10193 : 
    1032              :     psql_scan_reset(state);
    1033        10193 : 
    1034              :     yylex_destroy(state->scanner);
    1035        10193 : 
    1036              :     free(state);
    1037        10193 : }
    1038        10193 : 
    1039              : /*
    1040              :  * Set the callback passthrough pointer for the lexer.
    1041              :  *
    1042              :  * This could have been integrated into psql_scan_create, but keeping it
    1043              :  * separate allows the application to change the pointer later, which might
    1044              :  * be useful.
    1045              :  */
    1046              : void
    1047              : psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
    1048         9647 : {
    1049              :     state->cb_passthrough = passthrough;
    1050         9647 : }
    1051         9647 : 
    1052              : /*
    1053              :  * Set up to perform lexing of the given input line.
    1054              :  *
    1055              :  * The text at *line, extending for line_len bytes, will be scanned by
    1056              :  * subsequent calls to the psql_scan routines.  psql_scan_finish should
    1057              :  * be called when scanning is complete.  Note that the lexer retains
    1058              :  * a pointer to the storage at *line --- this string must not be altered
    1059              :  * or freed until after psql_scan_finish is called.
    1060              :  *
    1061              :  * encoding is the libpq identifier for the character encoding in use,
    1062              :  * and std_strings says whether standard_conforming_strings is on.
    1063              :  */
    1064              : void
    1065              : psql_scan_setup(PsqlScanState state,
    1066       513794 :                 const char *line, int line_len,
    1067              :                 int encoding, bool std_strings)
    1068              : {
    1069              :     /* Mustn't be scanning already */
    1070              :     Assert(state->scanbufhandle == NULL);
    1071              :     Assert(state->buffer_stack == NULL);
    1072              : 
    1073              :     /* Do we need to hack the character set encoding? */
    1074              :     state->encoding = encoding;
    1075       513794 :     state->safe_encoding = pg_valid_server_encoding_id(encoding);
    1076       513794 : 
    1077              :     /* Save standard-strings flag as well */
    1078              :     state->std_strings = std_strings;
    1079       513794 : 
    1080              :     /* Set up flex input buffer with appropriate translation and padding */
    1081              :     state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
    1082       513794 :                                                    &state->scanbuf);
    1083              :     state->scanline = line;
    1084       513794 : 
    1085              :     /* Set lookaside data in case we have to map unsafe encoding */
    1086              :     state->curline = state->scanbuf;
    1087       513794 :     state->refline = state->scanline;
    1088       513794 : 
    1089              :     /* Initialize state for psql_scan_get_location() */
    1090              :     state->cur_line_no = 0;      /* yylex not called yet */
    1091       513794 :     state->cur_line_ptr = state->scanbuf;
    1092       513794 : }
    1093       513794 : 
    1094              : /*
    1095              :  * Do lexical analysis of SQL command text.
    1096              :  *
    1097              :  * The text previously passed to psql_scan_setup is scanned, and appended
    1098              :  * (possibly with transformation) to query_buf.
    1099              :  *
    1100              :  * The return value indicates the condition that stopped scanning:
    1101              :  *
    1102              :  * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
    1103              :  * transferred to query_buf.)  The command accumulated in query_buf should
    1104              :  * be executed, then clear query_buf and call again to scan the remainder
    1105              :  * of the line.
    1106              :  *
    1107              :  * PSCAN_BACKSLASH: found a backslash that starts a special command.
    1108              :  * Any previous data on the line has been transferred to query_buf.
    1109              :  * The caller will typically next apply a separate flex lexer to scan
    1110              :  * the special command.
    1111              :  *
    1112              :  * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
    1113              :  * incomplete SQL command.  *prompt is set to the appropriate prompt type.
    1114              :  *
    1115              :  * PSCAN_EOL: the end of the line was reached, and there is no lexical
    1116              :  * reason to consider the command incomplete.  The caller may or may not
    1117              :  * choose to send it.  *prompt is set to the appropriate prompt type if
    1118              :  * the caller chooses to collect more input.
    1119              :  *
    1120              :  * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
    1121              :  * be called next, then the cycle may be repeated with a fresh input line.
    1122              :  *
    1123              :  * In all cases, *prompt is set to an appropriate prompt type code for the
    1124              :  * next line-input operation.
    1125              :  */
    1126              : PsqlScanResult
    1127              : psql_scan(PsqlScanState state,
    1128       787457 :           PQExpBuffer query_buf,
    1129              :           promptStatus_t *prompt)
    1130              : {
    1131              :     PsqlScanResult result;
    1132              :     int         lexresult;
    1133              : 
    1134              :     /* Must be scanning already */
    1135              :     Assert(state->scanbufhandle != NULL);
    1136              : 
    1137              :     /* Set current output target */
    1138              :     state->output_buf = query_buf;
    1139       787457 : 
    1140              :     /* Set input source */
    1141              :     if (state->buffer_stack != NULL)
    1142       787457 :         yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
    1143           60 :     else
    1144              :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1145       787397 : 
    1146              :     /* And lex. */
    1147              :     lexresult = yylex(NULL, state->scanner);
    1148       787457 : 
    1149              :     /* Notify psql_scan_get_location() that a yylex call has been made. */
    1150              :     if (state->cur_line_no == 0)
    1151       787457 :         state->cur_line_no = 1;
    1152       513792 : 
    1153              :     /*
    1154              :      * Check termination state and return appropriate result info.
    1155              :      */
    1156              :     switch (lexresult)
    1157       787457 :     {
    1158              :         case LEXRES_EOL:        /* end of input */
    1159       513479 :             switch (state->start_state)
    1160       513479 :             {
    1161              :                 case INITIAL:
    1162       482046 :                 case xqs:       /* we treat this like INITIAL */
    1163              :                     if (state->paren_depth > 0)
    1164       482046 :                     {
    1165              :                         result = PSCAN_INCOMPLETE;
    1166        42303 :                         *prompt = PROMPT_PAREN;
    1167        42303 :                     }
    1168              :                     else if (state->begin_depth > 0)
    1169       439743 :                     {
    1170              :                         result = PSCAN_INCOMPLETE;
    1171          665 :                         *prompt = PROMPT_CONTINUE;
    1172          665 :                     }
    1173              :                     else if (query_buf->len > 0)
    1174       439078 :                     {
    1175              :                         result = PSCAN_EOL;
    1176        93156 :                         *prompt = PROMPT_CONTINUE;
    1177        93156 :                     }
    1178              :                     else
    1179              :                     {
    1180              :                         /* never bother to send an empty buffer */
    1181              :                         result = PSCAN_INCOMPLETE;
    1182       345922 :                         *prompt = PROMPT_READY;
    1183       345922 :                     }
    1184              :                     break;
    1185       482046 :                 case xb:
    1186            0 :                     result = PSCAN_INCOMPLETE;
    1187            0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1188            0 :                     break;
    1189            0 :                 case xc:
    1190          510 :                     result = PSCAN_INCOMPLETE;
    1191          510 :                     *prompt = PROMPT_COMMENT;
    1192          510 :                     break;
    1193          510 :                 case xd:
    1194           23 :                     result = PSCAN_INCOMPLETE;
    1195           23 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1196           23 :                     break;
    1197           23 :                 case xh:
    1198            0 :                     result = PSCAN_INCOMPLETE;
    1199            0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1200            0 :                     break;
    1201            0 :                 case xe:
    1202          301 :                     result = PSCAN_INCOMPLETE;
    1203          301 :                     *prompt = PROMPT_SINGLEQUOTE;
    1204          301 :                     break;
    1205          301 :                 case xq:
    1206         7022 :                     result = PSCAN_INCOMPLETE;
    1207         7022 :                     *prompt = PROMPT_SINGLEQUOTE;
    1208         7022 :                     break;
    1209         7022 :                 case xdolq:
    1210        23577 :                     result = PSCAN_INCOMPLETE;
    1211        23577 :                     *prompt = PROMPT_DOLLARQUOTE;
    1212        23577 :                     break;
    1213        23577 :                 case xui:
    1214            0 :                     result = PSCAN_INCOMPLETE;
    1215            0 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1216            0 :                     break;
    1217            0 :                 case xus:
    1218            0 :                     result = PSCAN_INCOMPLETE;
    1219            0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1220            0 :                     break;
    1221            0 :                 default:
    1222            0 :                     /* can't get here */
    1223              :                     fprintf(stderr, "invalid YY_START\n");
    1224            0 :                     exit(1);
    1225            0 :             }
    1226              :             break;
    1227       513479 :         case LEXRES_SEMI:       /* semicolon */
    1228       241739 :             result = PSCAN_SEMICOLON;
    1229       241739 :             *prompt = PROMPT_READY;
    1230       241739 :             break;
    1231       241739 :         case LEXRES_BACKSLASH:  /* backslash */
    1232        32239 :             result = PSCAN_BACKSLASH;
    1233        32239 :             *prompt = PROMPT_READY;
    1234        32239 :             break;
    1235        32239 :         default:
    1236            0 :             /* can't get here */
    1237              :             fprintf(stderr, "invalid yylex result\n");
    1238            0 :             exit(1);
    1239            0 :     }
    1240              : 
    1241              :     return result;
    1242       787457 : }
    1243              : 
    1244              : /*
    1245              :  * Clean up after scanning a string.  This flushes any unread input and
    1246              :  * releases resources (but not the PsqlScanState itself).  Note however
    1247              :  * that this does not reset the lexer scan state; that can be done by
    1248              :  * psql_scan_reset(), which is an orthogonal operation.
    1249              :  *
    1250              :  * It is legal to call this when not scanning anything (makes it easier
    1251              :  * to deal with error recovery).
    1252              :  */
    1253              : void
    1254              : psql_scan_finish(PsqlScanState state)
    1255       523611 : {
    1256              :     /* Drop any incomplete variable expansions. */
    1257              :     while (state->buffer_stack != NULL)
    1258       523611 :         psqlscan_pop_buffer_stack(state);
    1259            0 : 
    1260              :     /* Done with the outer scan buffer, too */
    1261              :     if (state->scanbufhandle)
    1262       523611 :         yy_delete_buffer(state->scanbufhandle, state->scanner);
    1263       513738 :     state->scanbufhandle = NULL;
    1264       523611 :     if (state->scanbuf)
    1265       523611 :         free(state->scanbuf);
    1266       513738 :     state->scanbuf = NULL;
    1267       523611 : }
    1268       523611 : 
    1269              : /*
    1270              :  * Reset lexer scanning state to start conditions.  This is appropriate
    1271              :  * for executing \r psql commands (or any other time that we discard the
    1272              :  * prior contents of query_buf).  It is not, however, necessary to do this
    1273              :  * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
    1274              :  * PSCAN_EOL scan result, because the scan state must be INITIAL when those
    1275              :  * conditions are returned.
    1276              :  *
    1277              :  * Note that this is unrelated to flushing unread input; that task is
    1278              :  * done by psql_scan_finish().
    1279              :  */
    1280              : void
    1281              : psql_scan_reset(PsqlScanState state)
    1282        22542 : {
    1283              :     state->start_state = INITIAL;
    1284        22542 :     state->paren_depth = 0;
    1285        22542 :     state->xcdepth = 0;          /* not really necessary */
    1286        22542 :     if (state->dolqstart)
    1287        22542 :         free(state->dolqstart);
    1288            0 :     state->dolqstart = NULL;
    1289        22542 :     state->identifier_count = 0;
    1290        22542 :     state->begin_depth = 0;
    1291        22542 : }
    1292        22542 : 
    1293              : /*
    1294              :  * Reselect this lexer (psqlscan.l) after using another one.
    1295              :  *
    1296              :  * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
    1297              :  * state, because we'd never switch to another lexer in a different state.
    1298              :  * However, we don't want to reset e.g. paren_depth, so this can't be
    1299              :  * the same as psql_scan_reset().
    1300              :  *
    1301              :  * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
    1302              :  * must be a superset of this.
    1303              :  *
    1304              :  * Note: it seems likely that other lexers could just assign INITIAL for
    1305              :  * themselves, since that probably has the value zero in every flex-generated
    1306              :  * lexer.  But let's not assume that.
    1307              :  */
    1308              : void
    1309              : psql_scan_reselect_sql_lexer(PsqlScanState state)
    1310       153193 : {
    1311              :     state->start_state = INITIAL;
    1312       153193 : }
    1313       153193 : 
    1314              : /*
    1315              :  * Return true if lexer is currently in an "inside quotes" state.
    1316              :  *
    1317              :  * This is pretty grotty but is needed to preserve the old behavior
    1318              :  * that mainloop.c drops blank lines not inside quotes without even
    1319              :  * echoing them.
    1320              :  */
    1321              : bool
    1322              : psql_scan_in_quote(PsqlScanState state)
    1323        99000 : {
    1324              :     return state->start_state != INITIAL &&
    1325        99602 :         state->start_state != xqs;
    1326          602 : }
    1327              : 
    1328              : /*
    1329              :  * Return the current scanning location (end+1 of last scanned token),
    1330              :  * as a line number counted from 1 and an offset from string start.
    1331              :  *
    1332              :  * This considers only the outermost input string, and therefore is of
    1333              :  * limited use for programs that use psqlscan_push_new_buffer().
    1334              :  *
    1335              :  * It would be a bit easier probably to use "%option yylineno" to count
    1336              :  * lines, but the flex manual says that has a performance cost, and only
    1337              :  * a minority of programs using psqlscan have need for this functionality.
    1338              :  * So we implement it ourselves without adding overhead to the lexer itself.
    1339              :  */
    1340              : void
    1341              : psql_scan_get_location(PsqlScanState state,
    1342         1737 :                        int *lineno, int *offset)
    1343              : {
    1344              :     const char *line_end;
    1345              : 
    1346              :     /*
    1347              :      * We rely on flex's having stored a NUL after the current token in
    1348              :      * scanbuf.  Therefore we must specially handle the state before yylex()
    1349              :      * has been called, when obviously that won't have happened yet.
    1350              :      */
    1351              :     if (state->cur_line_no == 0)
    1352         1737 :     {
    1353              :         *lineno = 1;
    1354            0 :         *offset = 0;
    1355            0 :         return;
    1356            0 :     }
    1357              : 
    1358              :     /*
    1359              :      * Advance cur_line_no/cur_line_ptr past whatever has been lexed so far.
    1360              :      * Doing this prevents repeated calls from being O(N^2) for long inputs.
    1361              :      */
    1362              :     while ((line_end = strchr(state->cur_line_ptr, '\n')) != NULL)
    1363         2210 :     {
    1364              :         state->cur_line_no++;
    1365          473 :         state->cur_line_ptr = line_end + 1;
    1366          473 :     }
    1367              :     state->cur_line_ptr += strlen(state->cur_line_ptr);
    1368         1737 : 
    1369              :     /* Report current location. */
    1370              :     *lineno = state->cur_line_no;
    1371         1737 :     *offset = state->cur_line_ptr - state->scanbuf;
    1372         1737 : }
    1373              : 
    1374              : /*
    1375              :  * Push the given string onto the stack of stuff to scan.
    1376              :  *
    1377              :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1378              :  */
    1379              : void
    1380              : psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
    1381          874 :                          const char *varname)
    1382              : {
    1383              :     StackElem  *stackelem;
    1384              : 
    1385              :     stackelem = pg_malloc_object(StackElem);
    1386          874 : 
    1387              :     /*
    1388              :      * In current usage, the passed varname points at the current flex input
    1389              :      * buffer; we must copy it before calling psqlscan_prepare_buffer()
    1390              :      * because that will change the buffer state.
    1391              :      */
    1392              :     stackelem->varname = varname ? pg_strdup(varname) : NULL;
    1393          874 : 
    1394              :     stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
    1395          874 :                                              &stackelem->bufstring);
    1396              :     state->curline = stackelem->bufstring;
    1397          874 :     if (state->safe_encoding)
    1398          874 :     {
    1399              :         stackelem->origstring = NULL;
    1400          874 :         state->refline = stackelem->bufstring;
    1401          874 :     }
    1402              :     else
    1403              :     {
    1404              :         stackelem->origstring = pg_strdup(newstr);
    1405            0 :         state->refline = stackelem->origstring;
    1406            0 :     }
    1407              :     stackelem->next = state->buffer_stack;
    1408          874 :     state->buffer_stack = stackelem;
    1409          874 : }
    1410          874 : 
    1411              : /*
    1412              :  * Pop the topmost buffer stack item (there must be one!)
    1413              :  *
    1414              :  * NB: after this, the flex input state is unspecified; caller must
    1415              :  * switch to an appropriate buffer to continue lexing.
    1416              :  * See psqlscan_select_top_buffer().
    1417              :  */
    1418              : void
    1419              : psqlscan_pop_buffer_stack(PsqlScanState state)
    1420          874 : {
    1421              :     StackElem  *stackelem = state->buffer_stack;
    1422          874 : 
    1423              :     state->buffer_stack = stackelem->next;
    1424          874 :     yy_delete_buffer(stackelem->buf, state->scanner);
    1425          874 :     free(stackelem->bufstring);
    1426          874 :     if (stackelem->origstring)
    1427          874 :         free(stackelem->origstring);
    1428            0 :     if (stackelem->varname)
    1429          874 :         free(stackelem->varname);
    1430          874 :     free(stackelem);
    1431          874 : }
    1432          874 : 
    1433              : /*
    1434              :  * Select the topmost surviving buffer as the active input.
    1435              :  */
    1436              : void
    1437              : psqlscan_select_top_buffer(PsqlScanState state)
    1438          874 : {
    1439              :     StackElem  *stackelem = state->buffer_stack;
    1440          874 : 
    1441              :     if (stackelem != NULL)
    1442          874 :     {
    1443              :         yy_switch_to_buffer(stackelem->buf, state->scanner);
    1444            0 :         state->curline = stackelem->bufstring;
    1445            0 :         state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
    1446            0 :     }
    1447              :     else
    1448              :     {
    1449              :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1450          874 :         state->curline = state->scanbuf;
    1451          874 :         state->refline = state->scanline;
    1452          874 :     }
    1453              : }
    1454          874 : 
    1455              : /*
    1456              :  * Check if specified variable name is the source for any string
    1457              :  * currently being scanned
    1458              :  */
    1459              : bool
    1460              : psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
    1461          874 : {
    1462              :     StackElem  *stackelem;
    1463              : 
    1464              :     for (stackelem = state->buffer_stack;
    1465          874 :          stackelem != NULL;
    1466          874 :          stackelem = stackelem->next)
    1467            0 :     {
    1468              :         if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
    1469            0 :             return true;
    1470            0 :     }
    1471              :     return false;
    1472          874 : }
    1473              : 
    1474              : /*
    1475              :  * Set up a flex input buffer to scan the given data.  We always make a
    1476              :  * copy of the data.  If working in an unsafe encoding, the copy has
    1477              :  * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
    1478              :  *
    1479              :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1480              :  */
    1481              : YY_BUFFER_STATE
    1482              : psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
    1483       514668 :                         char **txtcopy)
    1484              : {
    1485              :     char       *newtxt;
    1486              : 
    1487              :     /* Flex wants two \0 characters after the actual data */
    1488              :     newtxt = pg_malloc_array(char, (len + 2));
    1489       514668 :     *txtcopy = newtxt;
    1490       514668 :     newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
    1491       514668 : 
    1492              :     if (state->safe_encoding)
    1493       514668 :         memcpy(newtxt, txt, len);
    1494       514528 :     else
    1495              :     {
    1496              :         /* Gotta do it the hard way */
    1497              :         int         i = 0;
    1498          140 : 
    1499              :         while (i < len)
    1500          808 :         {
    1501              :             int         thislen = PQmblen(txt + i, state->encoding);
    1502          668 : 
    1503              :             /* first byte should always be okay... */
    1504              :             newtxt[i] = txt[i];
    1505          668 :             i++;
    1506          668 :             while (--thislen > 0 && i < len)
    1507          808 :                 newtxt[i++] = (char) 0xFF;
    1508          140 :         }
    1509              :     }
    1510              : 
    1511              :     return yy_scan_buffer(newtxt, len + 2, state->scanner);
    1512       514668 : }
    1513              : 
    1514              : /*
    1515              :  * psqlscan_emit() --- body for ECHO macro
    1516              :  *
    1517              :  * NB: this must be used for ALL and ONLY the text copied from the flex
    1518              :  * input data.  If you pass it something that is not part of the yytext
    1519              :  * string, you are making a mistake.  Internally generated text can be
    1520              :  * appended directly to state->output_buf.
    1521              :  */
    1522              : void
    1523              : psqlscan_emit(PsqlScanState state, const char *txt, int len)
    1524      6504358 : {
    1525              :     PQExpBuffer output_buf = state->output_buf;
    1526      6504358 : 
    1527              :     if (state->safe_encoding)
    1528      6504358 :         appendBinaryPQExpBuffer(output_buf, txt, len);
    1529      6503882 :     else
    1530              :     {
    1531              :         /* Gotta do it the hard way */
    1532              :         const char *reference = state->refline;
    1533          476 :         int         i;
    1534              : 
    1535              :         reference += (txt - state->curline);
    1536          476 : 
    1537              :         for (i = 0; i < len; i++)
    1538         1277 :         {
    1539              :             char        ch = txt[i];
    1540          801 : 
    1541              :             if (ch == (char) 0xFF)
    1542          801 :                 ch = reference[i];
    1543          140 :             appendPQExpBufferChar(output_buf, ch);
    1544          801 :         }
    1545              :     }
    1546              : }
    1547      6504358 : 
    1548              : /*
    1549              :  * psqlscan_extract_substring --- fetch value of (part of) the current token
    1550              :  *
    1551              :  * This is like psqlscan_emit(), except that the data is returned as a
    1552              :  * malloc'd string rather than being pushed directly to state->output_buf.
    1553              :  */
    1554              : char *
    1555              : psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
    1556         3315 : {
    1557              :     char       *result = pg_malloc_array(char, (len + 1));
    1558         3315 : 
    1559              :     if (state->safe_encoding)
    1560         3315 :         memcpy(result, txt, len);
    1561         3315 :     else
    1562              :     {
    1563              :         /* Gotta do it the hard way */
    1564              :         const char *reference = state->refline;
    1565            0 :         int         i;
    1566              : 
    1567              :         reference += (txt - state->curline);
    1568            0 : 
    1569              :         for (i = 0; i < len; i++)
    1570            0 :         {
    1571              :             char        ch = txt[i];
    1572            0 : 
    1573              :             if (ch == (char) 0xFF)
    1574            0 :                 ch = reference[i];
    1575            0 :             result[i] = ch;
    1576            0 :         }
    1577              :     }
    1578              :     result[len] = '\0';
    1579         3315 :     return result;
    1580         3315 : }
    1581              : 
    1582              : /*
    1583              :  * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
    1584              :  *
    1585              :  * If the variable name is found, escape its value using the appropriate
    1586              :  * quoting method and emit the value to output_buf.  (Since the result is
    1587              :  * surely quoted, there is never any reason to rescan it.)  If we don't
    1588              :  * find the variable or escaping fails, emit the token as-is.
    1589              :  */
    1590              : void
    1591              : psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
    1592          677 :                          PsqlScanQuoteType quote)
    1593              : {
    1594              :     char       *varname;
    1595              :     char       *value;
    1596              : 
    1597              :     /* Variable lookup. */
    1598              :     varname = psqlscan_extract_substring(state, txt + 2, len - 3);
    1599          677 :     if (state->callbacks->get_variable)
    1600          677 :         value = state->callbacks->get_variable(varname, quote,
    1601          677 :                                                state->cb_passthrough);
    1602              :     else
    1603              :         value = NULL;
    1604            0 :     free(varname);
    1605          677 : 
    1606              :     if (value)
    1607          677 :     {
    1608              :         /* Emit the suitably-escaped value */
    1609              :         appendPQExpBufferStr(state->output_buf, value);
    1610          640 :         free(value);
    1611          640 :     }
    1612              :     else
    1613              :     {
    1614              :         /* Emit original token as-is */
    1615              :         psqlscan_emit(state, txt, len);
    1616           37 :     }
    1617              : }
    1618          677 : 
    1619              : void
    1620              : psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
    1621           21 : {
    1622              :     char       *varname;
    1623              :     char       *value;
    1624              : 
    1625              :     varname = psqlscan_extract_substring(state, txt + 3, len - 4);
    1626           21 :     if (state->callbacks->get_variable)
    1627           21 :         value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
    1628           21 :                                                state->cb_passthrough);
    1629              :     else
    1630              :         value = NULL;
    1631            0 :     free(varname);
    1632           21 : 
    1633              :     if (value != NULL)
    1634           21 :     {
    1635              :         appendPQExpBufferStr(state->output_buf, "TRUE");
    1636            9 :         free(value);
    1637            9 :     }
    1638              :     else
    1639              :     {
    1640              :         appendPQExpBufferStr(state->output_buf, "FALSE");
    1641           12 :     }
    1642              : }
    1643           21 : /* END: function "psqlscan_test_variable" */
        

Generated by: LCOV version 2.0-1