LCOV - code coverage report
Current view: top level - src/fe_utils - psqlscan.l (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 82.3 % 571 470
Test Date: 2026-03-12 01:15:13 Functions: 100.0 % 19 19
Legend: Lines:     hit not hit

            Line data    Source code
       1              : %top{
       2              : /*-------------------------------------------------------------------------
       3              :  *
       4              :  * psqlscan.l
       5              :  *    lexical scanner for SQL commands
       6              :  *
       7              :  * This lexer used to be part of psql, and that heritage is reflected in
       8              :  * the file name as well as function and typedef names, though it can now
       9              :  * be used by other frontend programs as well.  It's also possible to extend
      10              :  * this lexer with a compatible add-on lexer to handle program-specific
      11              :  * backslash commands.
      12              :  *
      13              :  * This code is mainly concerned with determining where the end of a SQL
      14              :  * statement is: we are looking for semicolons that are not within quotes,
      15              :  * comments, or parentheses.  The most reliable way to handle this is to
      16              :  * borrow the backend's flex lexer rules, lock, stock, and barrel.  The rules
      17              :  * below are (except for a few) the same as the backend's, but their actions
      18              :  * are just ECHO whereas the backend's actions generally do other things.
      19              :  *
      20              :  * XXX The rules in this file must be kept in sync with the backend lexer!!!
      21              :  *
      22              :  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
      23              :  *
      24              :  * See psqlscan_int.h for additional commentary.
      25              :  *
      26              :  *
      27              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      28              :  * Portions Copyright (c) 1994, Regents of the University of California
      29              :  *
      30              :  * IDENTIFICATION
      31              :  *    src/fe_utils/psqlscan.l
      32              :  *
      33              :  *-------------------------------------------------------------------------
      34              :  */
      35              : #include "postgres_fe.h"
      36              : 
      37              : #include "common/logging.h"
      38              : #include "fe_utils/psqlscan.h"
      39              : 
      40              : #include "libpq-fe.h"
      41              : }
      42              : 
      43              : %{
      44              : 
      45              : /* LCOV_EXCL_START */
      46              : 
      47              : #include "fe_utils/psqlscan_int.h"
      48              : 
      49              : /*
      50              :  * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
      51              :  * doesn't presently make use of that argument, so just declare it as int.
      52              :  */
      53              : typedef int YYSTYPE;
      54              : 
      55              : 
      56              : /* Return values from yylex() */
      57              : #define LEXRES_EOL          0   /* end of input */
      58              : #define LEXRES_SEMI         1   /* command-terminating semicolon found */
      59              : #define LEXRES_BACKSLASH    2   /* backslash command start */
      60              : 
      61              : 
      62              : #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
      63              : 
      64              : %}
      65              : 
      66              : %option reentrant
      67              : %option bison-bridge
      68              : %option 8bit
      69              : %option never-interactive
      70              : %option nodefault
      71              : %option noinput
      72              : %option nounput
      73              : %option noyywrap
      74              : %option warn
      75              : %option prefix="psql_yy"
      76              : 
      77              : /*
      78              :  * Set the type of yyextra; we use it as a pointer back to the containing
      79              :  * PsqlScanState.
      80              :  */
      81              : %option extra-type="PsqlScanState"
      82              : 
      83              : /*
      84              :  * All of the following definitions and rules should exactly match
      85              :  * src/backend/parser/scan.l so far as the flex patterns are concerned.
      86              :  * The rule bodies are just ECHO as opposed to what the backend does,
      87              :  * however.  (But be sure to duplicate code that affects the lexing process,
      88              :  * such as BEGIN() and yyless().)  Also, psqlscan uses a single <<EOF>> rule
      89              :  * whereas scan.l has a separate one for each exclusive state.
      90              :  */
      91              : 
      92              : /*
      93              :  * OK, here is a short description of lex/flex rules behavior.
      94              :  * The longest pattern which matches an input string is always chosen.
      95              :  * For equal-length patterns, the first occurring in the rules list is chosen.
      96              :  * INITIAL is the starting state, to which all non-conditional rules apply.
      97              :  * Exclusive states change parsing rules while the state is active.  When in
      98              :  * an exclusive state, only those rules defined for that state apply.
      99              :  *
     100              :  * We use exclusive states for quoted strings, extended comments,
     101              :  * and to eliminate parsing troubles for numeric strings.
     102              :  * Exclusive states:
     103              :  *  <xb> bit string literal
     104              :  *  <xc> extended C-style comments
     105              :  *  <xd> delimited identifiers (double-quoted identifiers)
     106              :  *  <xh> hexadecimal byte string
     107              :  *  <xq> standard quoted strings
     108              :  *  <xqs> quote stop (detect continued strings)
     109              :  *  <xe> extended quoted strings (support backslash escape sequences)
     110              :  *  <xdolq> $foo$ quoted strings
     111              :  *  <xui> quoted identifier with Unicode escapes
     112              :  *  <xus> quoted string with Unicode escapes
     113              :  *
     114              :  * Note: we intentionally don't mimic the backend's <xeu> state; we have
     115              :  * no need to distinguish it from <xe> state, and no good way to get out
     116              :  * of it in error cases.  The backend just throws yyerror() in those
     117              :  * cases, but that's not an option here.
     118              :  */
     119              : 
     120              : %x xb
     121              : %x xc
     122              : %x xd
     123              : %x xh
     124              : %x xq
     125              : %x xqs
     126              : %x xe
     127              : %x xdolq
     128              : %x xui
     129              : %x xus
     130              : 
     131              : /*
     132              :  * In order to make the world safe for Windows and Mac clients as well as
     133              :  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
     134              :  * sequence will be seen as two successive newlines, but that doesn't cause
     135              :  * any problems.  Comments that start with -- and extend to the next
     136              :  * newline are treated as equivalent to a single whitespace character.
     137              :  *
     138              :  * NOTE a fine point: if there is no newline following --, we will absorb
     139              :  * everything to the end of the input as a comment.  This is correct.  Older
     140              :  * versions of Postgres failed to recognize -- as a comment if the input
     141              :  * did not end with a newline.
     142              :  *
     143              :  * non_newline_space tracks all space characters except newlines.
     144              :  *
     145              :  * XXX if you change the set of whitespace characters, fix scanner_isspace()
     146              :  * to agree.
     147              :  */
     148              : 
     149              : space               [ \t\n\r\f\v]
     150              : non_newline_space   [ \t\f\v]
     151              : newline             [\n\r]
     152              : non_newline         [^\n\r]
     153              : 
     154              : comment         ("--"{non_newline}*)
     155              : 
     156              : whitespace      ({space}+|{comment})
     157              : 
     158              : /*
     159              :  * SQL requires at least one newline in the whitespace separating
     160              :  * string literals that are to be concatenated.  Silly, but who are we
     161              :  * to argue?  Note that {whitespace_with_newline} should not have * after
     162              :  * it, whereas {whitespace} should generally have a * after it...
     163              :  */
     164              : 
     165              : special_whitespace      ({space}+|{comment}{newline})
     166              : non_newline_whitespace  ({non_newline_space}|{comment})
     167              : whitespace_with_newline ({non_newline_whitespace}*{newline}{special_whitespace}*)
     168              : 
     169              : quote           '
     170              : /* If we see {quote} then {quotecontinue}, the quoted string continues */
     171              : quotecontinue   {whitespace_with_newline}{quote}
     172              : 
     173              : /*
     174              :  * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
     175              :  * {quotecontinue}.  It might seem that this could just be {whitespace}*,
     176              :  * but if there's a dash after {whitespace_with_newline}, it must be consumed
     177              :  * to see if there's another dash --- which would start a {comment} and thus
     178              :  * allow continuation of the {quotecontinue} token.
     179              :  */
     180              : quotecontinuefail   {whitespace}*"-"?
     181              : 
     182              : /* Bit string
     183              :  * It is tempting to scan the string for only those characters
     184              :  * which are allowed. However, this leads to silently swallowed
     185              :  * characters if illegal characters are included in the string.
     186              :  * For example, if xbinside is [01] then B'ABCD' is interpreted
     187              :  * as a zero-length string, and the ABCD' is lost!
     188              :  * Better to pass the string forward and let the input routines
     189              :  * validate the contents.
     190              :  */
     191              : xbstart         [bB]{quote}
     192              : xbinside        [^']*
     193              : 
     194              : /* Hexadecimal byte string */
     195              : xhstart         [xX]{quote}
     196              : xhinside        [^']*
     197              : 
     198              : /* National character */
     199              : xnstart         [nN]{quote}
     200              : 
     201              : /* Quoted string that allows backslash escapes */
     202              : xestart         [eE]{quote}
     203              : xeinside        [^\\']+
     204              : xeescape        [\\][^0-7]
     205              : xeoctesc        [\\][0-7]{1,3}
     206              : xehexesc        [\\]x[0-9A-Fa-f]{1,2}
     207              : xeunicode       [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
     208              : xeunicodefail   [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
     209              : 
     210              : /* Extended quote
     211              :  * xqdouble implements embedded quote, ''''
     212              :  */
     213              : xqstart         {quote}
     214              : xqdouble        {quote}{quote}
     215              : xqinside        [^']+
     216              : 
     217              : /* $foo$ style quotes ("dollar quoting")
     218              :  * The quoted string starts with $foo$ where "foo" is an optional string
     219              :  * in the form of an identifier, except that it may not contain "$",
     220              :  * and extends to the first occurrence of an identical string.
     221              :  * There is *no* processing of the quoted text.
     222              :  *
     223              :  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
     224              :  * fails to match its trailing "$".
     225              :  */
     226              : dolq_start      [A-Za-z\200-\377_]
     227              : dolq_cont       [A-Za-z\200-\377_0-9]
     228              : dolqdelim       \$({dolq_start}{dolq_cont}*)?\$
     229              : dolqfailed      \${dolq_start}{dolq_cont}*
     230              : dolqinside      [^$]+
     231              : 
     232              : /* Double quote
     233              :  * Allows embedded spaces and other special characters into identifiers.
     234              :  */
     235              : dquote          \"
     236              : xdstart         {dquote}
     237              : xdstop          {dquote}
     238              : xddouble        {dquote}{dquote}
     239              : xdinside        [^"]+
     240              : 
     241              : /* Quoted identifier with Unicode escapes */
     242              : xuistart        [uU]&{dquote}
     243              : 
     244              : /* Quoted string with Unicode escapes */
     245              : xusstart        [uU]&{quote}
     246              : 
     247              : /* error rule to avoid backup */
     248              : xufailed        [uU]&
     249              : 
     250              : 
     251              : /* C-style comments
     252              :  *
     253              :  * The "extended comment" syntax closely resembles allowable operator syntax.
     254              :  * The tricky part here is to get lex to recognize a string starting with
     255              :  * slash-star as a comment, when interpreting it as an operator would produce
     256              :  * a longer match --- remember lex will prefer a longer match!  Also, if we
     257              :  * have something like plus-slash-star, lex will think this is a 3-character
     258              :  * operator whereas we want to see it as a + operator and a comment start.
     259              :  * The solution is two-fold:
     260              :  * 1. append {op_chars}* to xcstart so that it matches as much text as
     261              :  *    {operator} would. Then the tie-breaker (first matching rule of same
     262              :  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
     263              :  *    in case it contains a star-slash that should terminate the comment.
     264              :  * 2. In the operator rule, check for slash-star within the operator, and
     265              :  *    if found throw it back with yyless().  This handles the plus-slash-star
     266              :  *    problem.
     267              :  * Dash-dash comments have similar interactions with the operator rule.
     268              :  */
     269              : xcstart         \/\*{op_chars}*
     270              : xcstop          \*+\/
     271              : xcinside        [^*/]+
     272              : 
     273              : ident_start     [A-Za-z\200-\377_]
     274              : ident_cont      [A-Za-z\200-\377_0-9\$]
     275              : 
     276              : identifier      {ident_start}{ident_cont}*
     277              : 
     278              : /* Assorted special-case operators and operator-like tokens */
     279              : typecast        "::"
     280              : dot_dot         \.\.
     281              : colon_equals    ":="
     282              : 
     283              : /*
     284              :  * These operator-like tokens (unlike the above ones) also match the {operator}
     285              :  * rule, which means that they might be overridden by a longer match if they
     286              :  * are followed by a comment start or a + or - character. Accordingly, if you
     287              :  * add to this list, you must also add corresponding code to the {operator}
     288              :  * block to return the correct token in such cases. (This is not needed in
     289              :  * psqlscan.l since the token value is ignored there.)
     290              :  */
     291              : equals_greater  "=>"
     292              : less_equals     "<="
     293              : greater_equals  ">="
     294              : less_greater    "<>"
     295              : not_equals      "!="
     296              : 
     297              : /*
     298              :  * "self" is the set of chars that should be returned as single-character
     299              :  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
     300              :  * which can be one or more characters long (but if a single-char token
     301              :  * appears in the "self" set, it is not to be returned as an Op).  Note
     302              :  * that the sets overlap, but each has some chars that are not in the other.
     303              :  *
     304              :  * If you change either set, adjust the character lists appearing in the
     305              :  * rule for "operator"!
     306              :  */
     307              : self            [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
     308              : op_chars        [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
     309              : operator        {op_chars}+
     310              : 
     311              : /*
     312              :  * Numbers
     313              :  *
     314              :  * Unary minus is not part of a number here.  Instead we pass it separately to
     315              :  * the parser, and there it gets coerced via doNegate().
     316              :  *
     317              :  * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
     318              :  *
     319              :  * {realfail} is added to prevent the need for scanner
     320              :  * backup when the {real} rule fails to match completely.
     321              :  */
     322              : decdigit        [0-9]
     323              : hexdigit        [0-9A-Fa-f]
     324              : octdigit        [0-7]
     325              : bindigit        [0-1]
     326              : 
     327              : decinteger      {decdigit}(_?{decdigit})*
     328              : hexinteger      0[xX](_?{hexdigit})+
     329              : octinteger      0[oO](_?{octdigit})+
     330              : bininteger      0[bB](_?{bindigit})+
     331              : 
     332              : hexfail         0[xX]_?
     333              : octfail         0[oO]_?
     334              : binfail         0[bB]_?
     335              : 
     336              : numeric         (({decinteger}\.{decinteger}?)|(\.{decinteger}))
     337              : numericfail     {decinteger}\.\.
     338              : 
     339              : real            ({decinteger}|{numeric})[Ee][-+]?{decinteger}
     340              : realfail        ({decinteger}|{numeric})[Ee][-+]
     341              : 
     342              : /* Positional parameters don't accept underscores. */
     343              : param           \${decdigit}+
     344              : 
     345              : /*
     346              :  * An identifier immediately following an integer literal is disallowed because
     347              :  * in some cases it's ambiguous what is meant: for example, 0x1234 could be
     348              :  * either a hexinteger or a decinteger "0" and an identifier "x1234".  We can
     349              :  * detect such problems by seeing if integer_junk matches a longer substring
     350              :  * than any of the XXXinteger patterns (decinteger, hexinteger, octinteger,
     351              :  * bininteger).  One "junk" pattern is sufficient because
     352              :  * {decinteger}{identifier} will match all the same strings we'd match with
     353              :  * {hexinteger}{identifier} etc.
     354              :  *
     355              :  * Note that the rule for integer_junk must appear after the ones for
     356              :  * XXXinteger to make this work correctly: 0x1234 will match both hexinteger
     357              :  * and integer_junk, and we need hexinteger to be chosen in that case.
     358              :  *
     359              :  * Also disallow strings matched by numeric_junk, real_junk and param_junk
     360              :  * for consistency.
     361              :  */
     362              : integer_junk    {decinteger}{identifier}
     363              : numeric_junk    {numeric}{identifier}
     364              : real_junk       {real}{identifier}
     365              : param_junk      \${decdigit}+{identifier}
     366              : 
     367              : /* psql-specific: characters allowed in variable names */
     368              : variable_char   [A-Za-z\200-\377_0-9]
     369              : 
     370              : other           .
     371              : 
     372              : /*
     373              :  * Dollar quoted strings are totally opaque, and no escaping is done on them.
     374              :  * Other quoted strings must allow some special characters such as single-quote
     375              :  *  and newline.
     376              :  * Embedded single-quotes are implemented both in the SQL standard
     377              :  *  style of two adjacent single quotes "''" and in the Postgres/Java style
     378              :  *  of escaped-quote "\'".
     379              :  * Other embedded escaped characters are matched explicitly and the leading
     380              :  *  backslash is dropped from the string.
     381              :  * Note that xcstart must appear before operator, as explained above!
     382              :  *  Also whitespace (comment) must appear before operator.
     383              :  */
     384              : 
     385              : %%
     386              : 
     387              : %{
     388              :         /* Declare some local variables inside yylex(), for convenience */
     389              :         PsqlScanState cur_state = yyextra;
     390       610442 :         PQExpBuffer output_buf = cur_state->output_buf;
     391       610442 : 
     392              :         /*
     393              :          * Force flex into the state indicated by start_state.  This has a
     394              :          * couple of purposes: it lets some of the functions below set a new
     395              :          * starting state without ugly direct access to flex variables, and it
     396              :          * allows us to transition from one flex lexer to another so that we
     397              :          * can lex different parts of the source string using separate lexers.
     398              :          */
     399              :         BEGIN(cur_state->start_state);
     400       610442 : %}
     401              : 
     402              : {whitespace}    {
     403              :                     /*
     404              :                      * Note that the whitespace rule includes both true
     405              :                      * whitespace and single-line ("--" style) comments.
     406              :                      * We suppress whitespace until we have collected some
     407              :                      * non-whitespace data.  (This interacts with some
     408              :                      * decisions in MainLoop(); see there for details.)
     409              :                      */
     410              :                     if (output_buf->len > 0)
     411      1412344 :                         ECHO;
     412      1328122 :                 }
     413              : 
     414      1412344 : {xcstart}       {
     415          345 :                     cur_state->xcdepth = 0;
     416          345 :                     BEGIN(xc);
     417          345 :                     /* Put back any characters past slash-star; see above */
     418              :                     yyless(2);
     419          345 :                     ECHO;
     420          345 :                 }
     421              : 
     422          345 : <xc>{
     423              : {xcstart}       {
     424            9 :                     cur_state->xcdepth++;
     425            9 :                     /* Put back any characters past slash-star; see above */
     426              :                     yyless(2);
     427            9 :                     ECHO;
     428            9 :                 }
     429              : 
     430            9 : {xcstop}        {
     431          354 :                     if (cur_state->xcdepth <= 0)
     432          354 :                         BEGIN(INITIAL);
     433          345 :                     else
     434              :                         cur_state->xcdepth--;
     435            9 :                     ECHO;
     436          354 :                 }
     437              : 
     438          354 : {xcinside}      {
     439          802 :                     ECHO;
     440          802 :                 }
     441              : 
     442          802 : {op_chars}      {
     443          217 :                     ECHO;
     444          217 :                 }
     445              : 
     446          217 : \*+             {
     447            0 :                     ECHO;
     448            0 :                 }
     449              : } /* <xc> */
     450            0 : 
     451              : {xbstart}       {
     452          375 :                     BEGIN(xb);
     453          375 :                     ECHO;
     454          375 :                 }
     455              : <xh>{xhinside}    |
     456          375 : <xb>{xbinside}    {
     457         2025 :                     ECHO;
     458         2025 :                 }
     459              : 
     460         2025 : {xhstart}       {
     461         1665 :                     /* Hexadecimal bit type.
     462              :                      * At some point we should simply pass the string
     463              :                      * forward to the parser and label it there.
     464              :                      * In the meantime, place a leading "x" on the string
     465              :                      * to mark it for the input routine as a hex string.
     466              :                      */
     467              :                     BEGIN(xh);
     468         1665 :                     ECHO;
     469         1665 :                 }
     470              : 
     471         1665 : {xnstart}       {
     472            0 :                     yyless(1);  /* eat only 'n' this time */
     473            0 :                     ECHO;
     474            0 :                 }
     475              : 
     476            0 : {xqstart}       {
     477       119710 :                     if (cur_state->std_strings)
     478       119710 :                         BEGIN(xq);
     479       119710 :                     else
     480              :                         BEGIN(xe);
     481            0 :                     ECHO;
     482       119710 :                 }
     483              : {xestart}       {
     484       119710 :                     BEGIN(xe);
     485          669 :                     ECHO;
     486          669 :                 }
     487              : {xusstart}      {
     488          669 :                     BEGIN(xus);
     489          277 :                     ECHO;
     490          277 :                 }
     491              : 
     492          277 : <xb,xh,xq,xe,xus>{quote} {
     493       122696 :                     /*
     494              :                      * When we are scanning a quoted string and see an end
     495              :                      * quote, we must look ahead for a possible continuation.
     496              :                      * If we don't see one, we know the end quote was in fact
     497              :                      * the end of the string.  To reduce the lexer table size,
     498              :                      * we use a single "xqs" state to do the lookahead for all
     499              :                      * types of strings.
     500              :                      */
     501              :                     cur_state->state_before_str_stop = YYSTATE;
     502       122696 :                     BEGIN(xqs);
     503       122696 :                     ECHO;
     504       122696 :                 }
     505              : <xqs>{quotecontinue} {
     506       122696 :                     /*
     507            0 :                      * Found a quote continuation, so return to the in-quote
     508              :                      * state and continue scanning the literal.  Nothing is
     509              :                      * added to the literal's contents.
     510              :                      */
     511              :                     BEGIN(cur_state->state_before_str_stop);
     512            0 :                     ECHO;
     513            0 :                 }
     514              : <xqs>{quotecontinuefail} |
     515            0 : <xqs>{other}  {
     516       121908 :                     /*
     517              :                      * Failed to see a quote continuation.  Throw back
     518              :                      * everything after the end quote, and handle the string
     519              :                      * according to the state we were in previously.
     520              :                      */
     521              :                     yyless(0);
     522       121908 :                     BEGIN(INITIAL);
     523       121908 :                     /* There's nothing to echo ... */
     524              :                 }
     525              : 
     526       121908 : <xq,xe,xus>{xqdouble} {
     527         3158 :                     ECHO;
     528         3158 :                 }
     529              : <xq,xus>{xqinside}  {
     530         3158 :                     ECHO;
     531       125617 :                 }
     532              : <xe>{xeinside}  {
     533       125617 :                     ECHO;
     534         1343 :                 }
     535              : <xe>{xeunicode} {
     536         1343 :                     ECHO;
     537          108 :                 }
     538              : <xe>{xeunicodefail}   {
     539          108 :                     ECHO;
     540            6 :                 }
     541              : <xe>{xeescape}  {
     542            6 :                     ECHO;
     543          746 :                 }
     544              : <xe>{xeoctesc}  {
     545          746 :                     ECHO;
     546           12 :                 }
     547              : <xe>{xehexesc}  {
     548           12 :                     ECHO;
     549            6 :                 }
     550              : <xe>.         {
     551            6 :                     /* This is only needed for \ just before EOF */
     552            0 :                     ECHO;
     553            0 :                 }
     554              : 
     555            0 : {dolqdelim}     {
     556         3634 :                     cur_state->dolqstart = pg_strdup(yytext);
     557         3634 :                     BEGIN(xdolq);
     558         3634 :                     ECHO;
     559         3634 :                 }
     560              : {dolqfailed}    {
     561         3634 :                     /* throw back all but the initial "$" */
     562            0 :                     yyless(1);
     563            0 :                     ECHO;
     564            0 :                 }
     565              : <xdolq>{dolqdelim} {
     566            0 :                     if (strcmp(yytext, cur_state->dolqstart) == 0)
     567         3806 :                     {
     568              :                         free(cur_state->dolqstart);
     569         3634 :                         cur_state->dolqstart = NULL;
     570         3634 :                         BEGIN(INITIAL);
     571         3634 :                     }
     572              :                     else
     573              :                     {
     574              :                         /*
     575              :                          * When we fail to match $...$ to dolqstart, transfer
     576              :                          * the $... part to the output, but put back the final
     577              :                          * $ for rescanning.  Consider $delim$...$junk$delim$
     578              :                          */
     579              :                         yyless(yyleng - 1);
     580          172 :                     }
     581              :                     ECHO;
     582         3806 :                 }
     583              : <xdolq>{dolqinside} {
     584         3806 :                     ECHO;
     585        19016 :                 }
     586              : <xdolq>{dolqfailed} {
     587        19016 :                     ECHO;
     588          559 :                 }
     589              : <xdolq>.      {
     590          559 :                     /* This is only needed for $ inside the quoted text */
     591         1222 :                     ECHO;
     592         1222 :                 }
     593              : 
     594         1222 : {xdstart}       {
     595         5015 :                     BEGIN(xd);
     596         5015 :                     ECHO;
     597         5015 :                 }
     598              : {xuistart}      {
     599         5015 :                     BEGIN(xui);
     600           12 :                     ECHO;
     601           12 :                 }
     602              : <xd>{xdstop}  {
     603           12 :                     BEGIN(INITIAL);
     604         5015 :                     ECHO;
     605         5015 :                 }
     606              : <xui>{dquote} {
     607         5015 :                     BEGIN(INITIAL);
     608           12 :                     ECHO;
     609           12 :                 }
     610              : <xd,xui>{xddouble}    {
     611           12 :                     ECHO;
     612           61 :                 }
     613              : <xd,xui>{xdinside}    {
     614           61 :                     ECHO;
     615         5081 :                 }
     616              : 
     617         5081 : {xufailed}  {
     618            0 :                     /* throw back all but the initial u/U */
     619              :                     yyless(1);
     620            0 :                     ECHO;
     621            0 :                 }
     622              : 
     623            0 : {typecast}      {
     624        27564 :                     ECHO;
     625        27564 :                 }
     626              : 
     627        27564 : {dot_dot}       {
     628            0 :                     ECHO;
     629            0 :                 }
     630              : 
     631            0 : {colon_equals}  {
     632         1574 :                     ECHO;
     633         1574 :                 }
     634              : 
     635         1574 : {equals_greater} {
     636          979 :                     ECHO;
     637          979 :                 }
     638              : 
     639          979 : {less_equals}   {
     640         1484 :                     ECHO;
     641         1484 :                 }
     642              : 
     643         1484 : {greater_equals} {
     644         3897 :                     ECHO;
     645         3897 :                 }
     646              : 
     647         3897 : {less_greater}  {
     648          640 :                     ECHO;
     649          640 :                 }
     650              : 
     651          640 : {not_equals}    {
     652         1162 :                     ECHO;
     653         1162 :                 }
     654              : 
     655         1162 :     /*
     656              :      * These rules are specific to psql --- they implement parenthesis
     657              :      * counting and detection of command-ending semicolon.  These must
     658              :      * appear before the {self} rule so that they take precedence over it.
     659              :      */
     660              : 
     661       191237 : "("               {
     662              :                     cur_state->paren_depth++;
     663       191237 :                     ECHO;
     664       191237 :                 }
     665              : 
     666       191237 : ")"               {
     667       191230 :                     if (cur_state->paren_depth > 0)
     668       191230 :                         cur_state->paren_depth--;
     669       191230 :                     ECHO;
     670       191230 :                 }
     671              : 
     672       191230 : ";"               {
     673       184214 :                     ECHO;
     674       184214 :                     if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
     675       184214 :                     {
     676              :                         /* Terminate lexing temporarily */
     677              :                         cur_state->start_state = YY_START;
     678       184110 :                         cur_state->identifier_count = 0;
     679       184110 :                         return LEXRES_SEMI;
     680       184110 :                     }
     681              :                 }
     682              : 
     683          104 :     /*
     684              :      * psql-specific rules to handle backslash commands and variable
     685              :      * substitution.  We want these before {self}, also.
     686              :      */
     687              : 
     688          397 : "\\"[;:]      {
     689              :                     /* Force a semi-colon or colon into the query buffer */
     690              :                     psqlscan_emit(cur_state, yytext + 1, 1);
     691          397 :                     if (yytext[1] == ';')
     692          397 :                         cur_state->identifier_count = 0;
     693          397 :                 }
     694              : 
     695          397 : "\\"          {
     696        28941 :                     /* Terminate lexing temporarily */
     697              :                     cur_state->start_state = YY_START;
     698        28941 :                     return LEXRES_BACKSLASH;
     699        28941 :                 }
     700              : 
     701              : :{variable_char}+   {
     702         1496 :                     /* Possible psql variable substitution */
     703              :                     char       *varname;
     704              :                     char       *value;
     705              : 
     706              :                     varname = psqlscan_extract_substring(cur_state,
     707         1496 :                                                          yytext + 1,
     708         1496 :                                                          yyleng - 1);
     709         1496 :                     if (cur_state->callbacks->get_variable)
     710         1496 :                         value = cur_state->callbacks->get_variable(varname,
     711          900 :                                                                    PQUOTE_PLAIN,
     712              :                                                                    cur_state->cb_passthrough);
     713              :                     else
     714              :                         value = NULL;
     715          596 : 
     716              :                     if (value)
     717         1496 :                     {
     718              :                         /* It is a variable, check for recursion */
     719              :                         if (psqlscan_var_is_current_source(cur_state, varname))
     720          669 :                         {
     721              :                             /* Recursive expansion --- don't go there */
     722              :                             pg_log_warning("skipping recursive expansion of variable \"%s\"",
     723            0 :                                                               varname);
     724              :                             /* Instead copy the string as is */
     725              :                             ECHO;
     726            0 :                         }
     727              :                         else
     728              :                         {
     729              :                             /* OK, perform substitution */
     730              :                             psqlscan_push_new_buffer(cur_state, value, varname);
     731          669 :                             /* yy_scan_string already made buffer active */
     732              :                         }
     733              :                         free(value);
     734          669 :                     }
     735              :                     else
     736              :                     {
     737              :                         /*
     738              :                          * if the variable doesn't exist we'll copy the string
     739              :                          * as is
     740              :                          */
     741              :                         ECHO;
     742          827 :                     }
     743              : 
     744              :                     free(varname);
     745         1496 :                 }
     746              : 
     747         1496 : :'{variable_char}+' {
     748          467 :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     749          467 :                                              PQUOTE_SQL_LITERAL);
     750              :                 }
     751              : 
     752          467 : :\"{variable_char}+\" {
     753           17 :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     754           17 :                                              PQUOTE_SQL_IDENT);
     755              :                 }
     756              : 
     757           17 : :\{\?{variable_char}+\} {
     758            6 :                     psqlscan_test_variable(cur_state, yytext, yyleng);
     759            6 :                 }
     760              : 
     761            6 :     /*
     762              :      * These rules just avoid the need for scanner backup if one of the
     763              :      * three rules above fails to match completely.
     764              :      */
     765              : 
     766            0 : :'{variable_char}*  {
     767              :                     /* Throw back everything but the colon */
     768              :                     yyless(1);
     769            0 :                     ECHO;
     770            0 :                 }
     771              : 
     772            0 : :\"{variable_char}*    {
     773            0 :                     /* Throw back everything but the colon */
     774              :                     yyless(1);
     775            0 :                     ECHO;
     776            0 :                 }
     777              : 
     778            0 : :\{\?{variable_char}*   {
     779            0 :                     /* Throw back everything but the colon */
     780              :                     yyless(1);
     781            0 :                     ECHO;
     782            0 :                 }
     783              : :\{ {
     784            0 :                     /* Throw back everything but the colon */
     785            0 :                     yyless(1);
     786            0 :                     ECHO;
     787            0 :                 }
     788              : 
     789            0 :     /*
     790              :      * Back to backend-compatible rules.
     791              :      */
     792              : 
     793       333060 : {self}          {
     794              :                     ECHO;
     795       333060 :                 }
     796              : 
     797       333060 : {operator}      {
     798        10124 :                     /*
     799              :                      * Check for embedded slash-star or dash-dash; those
     800              :                      * are comment starts, so operator must stop there.
     801              :                      * Note that slash-star or dash-dash at the first
     802              :                      * character will match a prior rule, not this one.
     803              :                      */
     804              :                     int         nchars = yyleng;
     805        10124 :                     char       *slashstar = strstr(yytext, "/*");
     806        10124 :                     char       *dashdash = strstr(yytext, "--");
     807        10124 : 
     808              :                     if (slashstar && dashdash)
     809        10124 :                     {
     810              :                         /* if both appear, take the first one */
     811              :                         if (slashstar > dashdash)
     812            0 :                             slashstar = dashdash;
     813            0 :                     }
     814              :                     else if (!slashstar)
     815        10124 :                         slashstar = dashdash;
     816        10094 :                     if (slashstar)
     817        10124 :                         nchars = slashstar - yytext;
     818           36 : 
     819              :                     /*
     820              :                      * For SQL compatibility, '+' and '-' cannot be the
     821              :                      * last char of a multi-char operator unless the operator
     822              :                      * contains chars that are not in SQL operators.
     823              :                      * The idea is to lex '=-' as two operators, but not
     824              :                      * to forbid operator names like '?-' that could not be
     825              :                      * sequences of SQL operators.
     826              :                      */
     827              :                     if (nchars > 1 &&
     828        10124 :                         (yytext[nchars - 1] == '+' ||
     829         9242 :                          yytext[nchars - 1] == '-'))
     830         9239 :                     {
     831              :                         int         ic;
     832              : 
     833              :                         for (ic = nchars - 2; ic >= 0; ic--)
     834          279 :                         {
     835              :                             char c = yytext[ic];
     836          240 :                             if (c == '~' || c == '!' || c == '@' ||
     837          240 :                                 c == '#' || c == '^' || c == '&' ||
     838          198 :                                 c == '|' || c == '`' || c == '?' ||
     839           75 :                                 c == '%')
     840              :                                 break;
     841              :                         }
     842              :                         if (ic < 0)
     843          213 :                         {
     844              :                             /*
     845              :                              * didn't find a qualifying character, so remove
     846              :                              * all trailing [+-]
     847              :                              */
     848              :                             do {
     849              :                                 nchars--;
     850           39 :                             } while (nchars > 1 &&
     851           39 :                                  (yytext[nchars - 1] == '+' ||
     852           18 :                                   yytext[nchars - 1] == '-'));
     853           18 :                         }
     854              :                     }
     855              : 
     856              :                     if (nchars < yyleng)
     857        10124 :                     {
     858              :                         /* Strip the unwanted chars from the token */
     859              :                         yyless(nchars);
     860           75 :                     }
     861              :                     ECHO;
     862        10124 :                 }
     863              : 
     864        10124 : {param}         {
     865          607 :                     ECHO;
     866          607 :                 }
     867              : {param_junk}    {
     868          607 :                     ECHO;
     869            6 :                 }
     870              : 
     871            6 : {decinteger}    {
     872       106670 :                     ECHO;
     873       106670 :                 }
     874              : {hexinteger}    {
     875       106670 :                     ECHO;
     876           67 :                 }
     877              : {octinteger}    {
     878           67 :                     ECHO;
     879           30 :                 }
     880              : {bininteger}    {
     881           30 :                     ECHO;
     882           30 :                 }
     883              : {hexfail}       {
     884           30 :                     ECHO;
     885            3 :                 }
     886              : {octfail}       {
     887            3 :                     ECHO;
     888            3 :                 }
     889              : {binfail}       {
     890            3 :                     ECHO;
     891            3 :                 }
     892              : {numeric}       {
     893            3 :                     ECHO;
     894         4000 :                 }
     895              : {numericfail}   {
     896         4000 :                     /* throw back the .., and treat as integer */
     897            0 :                     yyless(yyleng - 2);
     898            0 :                     ECHO;
     899            0 :                 }
     900              : {real}          {
     901            0 :                     ECHO;
     902          316 :                 }
     903              : {realfail}      {
     904          316 :                     ECHO;
     905            3 :                 }
     906              : {integer_junk}  {
     907            3 :                     ECHO;
     908           33 :                 }
     909              : {numeric_junk}  {
     910           33 :                     ECHO;
     911           24 :                 }
     912              : {real_junk}     {
     913           24 :                     ECHO;
     914            0 :                 }
     915              : 
     916            0 : 
     917      1370161 : {identifier}    {
     918              :                     /*
     919              :                      * We need to track if we are inside a BEGIN .. END block
     920              :                      * in a function definition, so that semicolons contained
     921              :                      * therein don't terminate the whole statement.  Short of
     922              :                      * writing a full parser here, the following heuristic
     923              :                      * should work.  First, we track whether the beginning of
     924              :                      * the statement matches CREATE [OR REPLACE]
     925              :                      * {FUNCTION|PROCEDURE}
     926              :                      */
     927              : 
     928              :                     if (cur_state->identifier_count == 0)
     929      1370161 :                         memset(cur_state->identifiers, 0, sizeof(cur_state->identifiers));
     930       190646 : 
     931              :                     if (pg_strcasecmp(yytext, "create") == 0 ||
     932      2708162 :                         pg_strcasecmp(yytext, "function") == 0 ||
     933      2670531 :                         pg_strcasecmp(yytext, "procedure") == 0 ||
     934      2663307 :                         pg_strcasecmp(yytext, "or") == 0 ||
     935      2658212 :                         pg_strcasecmp(yytext, "replace") == 0)
     936      1327435 :                     {
     937              :                         if (cur_state->identifier_count < sizeof(cur_state->identifiers))
     938        43876 :                             cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned char) yytext[0]);
     939        39541 :                     }
     940              : 
     941              :                     cur_state->identifier_count++;
     942      1370161 : 
     943              :                     if (cur_state->identifiers[0] == 'c' &&
     944      1370161 :                         (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' ||
     945       321874 :                          (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' &&
     946       292595 :                           (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p'))) &&
     947        11436 :                         cur_state->paren_depth == 0)
     948        36477 :                     {
     949              :                         if (pg_strcasecmp(yytext, "begin") == 0)
     950        29201 :                             cur_state->begin_depth++;
     951           68 :                         else if (pg_strcasecmp(yytext, "case") == 0)
     952        29133 :                         {
     953              :                             /*
     954              :                              * CASE also ends with END.  We only need to track
     955              :                              * this if we are already inside a BEGIN.
     956              :                              */
     957              :                             if (cur_state->begin_depth >= 1)
     958            3 :                                 cur_state->begin_depth++;
     959            3 :                         }
     960              :                         else if (pg_strcasecmp(yytext, "end") == 0)
     961        29130 :                         {
     962              :                             if (cur_state->begin_depth > 0)
     963           71 :                                 cur_state->begin_depth--;
     964           71 :                         }
     965              :                     }
     966              : 
     967              :                     ECHO;
     968      1370161 :                 }
     969              : 
     970      1370161 : {other}         {
     971            0 :                     ECHO;
     972            0 :                 }
     973              : 
     974            0 : <<EOF>>         {
     975       398060 :                     if (cur_state->buffer_stack == NULL)
     976       398060 :                     {
     977              :                         cur_state->start_state = YY_START;
     978       397391 :                         return LEXRES_EOL;      /* end of input reached */
     979       397391 :                     }
     980              : 
     981              :                     /*
     982              :                      * We were expanding a variable, so pop the inclusion
     983              :                      * stack and keep lexing
     984              :                      */
     985              :                     psqlscan_pop_buffer_stack(cur_state);
     986          669 :                     psqlscan_select_top_buffer(cur_state);
     987          669 :                 }
     988              : 
     989          669 : %%
     990            0 : 
     991              : /* LCOV_EXCL_STOP */
     992              : 
     993              : /*
     994              :  * Create a lexer working state struct.
     995              :  *
     996              :  * callbacks is a struct of function pointers that encapsulate some
     997              :  * behavior we need from the surrounding program.  This struct must
     998              :  * remain valid for the lifespan of the PsqlScanState.
     999              :  */
    1000              : PsqlScanState
    1001              : psql_scan_create(const PsqlScanCallbacks *callbacks)
    1002        10023 : {
    1003              :     PsqlScanState state;
    1004              : 
    1005              :     state = pg_malloc0_object(PsqlScanStateData);
    1006        10023 : 
    1007              :     state->callbacks = callbacks;
    1008        10023 : 
    1009              :     yylex_init(&state->scanner);
    1010        10023 : 
    1011              :     yyset_extra(state, state->scanner);
    1012        10023 : 
    1013              :     psql_scan_reset(state);
    1014        10023 : 
    1015              :     return state;
    1016        10023 : }
    1017              : 
    1018              : /*
    1019              :  * Destroy a lexer working state struct, releasing all resources.
    1020              :  */
    1021              : void
    1022              : psql_scan_destroy(PsqlScanState state)
    1023         9967 : {
    1024              :     psql_scan_finish(state);
    1025         9967 : 
    1026              :     psql_scan_reset(state);
    1027         9967 : 
    1028              :     yylex_destroy(state->scanner);
    1029         9967 : 
    1030              :     free(state);
    1031         9967 : }
    1032         9967 : 
    1033              : /*
    1034              :  * Set the callback passthrough pointer for the lexer.
    1035              :  *
    1036              :  * This could have been integrated into psql_scan_create, but keeping it
    1037              :  * separate allows the application to change the pointer later, which might
    1038              :  * be useful.
    1039              :  */
    1040              : void
    1041              : psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
    1042         9420 : {
    1043              :     state->cb_passthrough = passthrough;
    1044         9420 : }
    1045         9420 : 
    1046              : /*
    1047              :  * Set up to perform lexing of the given input line.
    1048              :  *
    1049              :  * The text at *line, extending for line_len bytes, will be scanned by
    1050              :  * subsequent calls to the psql_scan routines.  psql_scan_finish should
    1051              :  * be called when scanning is complete.  Note that the lexer retains
    1052              :  * a pointer to the storage at *line --- this string must not be altered
    1053              :  * or freed until after psql_scan_finish is called.
    1054              :  *
    1055              :  * encoding is the libpq identifier for the character encoding in use,
    1056              :  * and std_strings says whether standard_conforming_strings is on.
    1057              :  */
    1058              : void
    1059              : psql_scan_setup(PsqlScanState state,
    1060       397686 :                 const char *line, int line_len,
    1061              :                 int encoding, bool std_strings)
    1062              : {
    1063              :     /* Mustn't be scanning already */
    1064              :     Assert(state->scanbufhandle == NULL);
    1065              :     Assert(state->buffer_stack == NULL);
    1066              : 
    1067              :     /* Do we need to hack the character set encoding? */
    1068              :     state->encoding = encoding;
    1069       397686 :     state->safe_encoding = pg_valid_server_encoding_id(encoding);
    1070       397686 : 
    1071              :     /* Save standard-strings flag as well */
    1072              :     state->std_strings = std_strings;
    1073       397686 : 
    1074              :     /* Set up flex input buffer with appropriate translation and padding */
    1075              :     state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
    1076       397686 :                                                    &state->scanbuf);
    1077              :     state->scanline = line;
    1078       397686 : 
    1079              :     /* Set lookaside data in case we have to map unsafe encoding */
    1080              :     state->curline = state->scanbuf;
    1081       397686 :     state->refline = state->scanline;
    1082       397686 : 
    1083              :     /* Initialize state for psql_scan_get_location() */
    1084              :     state->cur_line_no = 0;      /* yylex not called yet */
    1085       397686 :     state->cur_line_ptr = state->scanbuf;
    1086       397686 : }
    1087       397686 : 
    1088              : /*
    1089              :  * Do lexical analysis of SQL command text.
    1090              :  *
    1091              :  * The text previously passed to psql_scan_setup is scanned, and appended
    1092              :  * (possibly with transformation) to query_buf.
    1093              :  *
    1094              :  * The return value indicates the condition that stopped scanning:
    1095              :  *
    1096              :  * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
    1097              :  * transferred to query_buf.)  The command accumulated in query_buf should
    1098              :  * be executed, then clear query_buf and call again to scan the remainder
    1099              :  * of the line.
    1100              :  *
    1101              :  * PSCAN_BACKSLASH: found a backslash that starts a special command.
    1102              :  * Any previous data on the line has been transferred to query_buf.
    1103              :  * The caller will typically next apply a separate flex lexer to scan
    1104              :  * the special command.
    1105              :  *
    1106              :  * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
    1107              :  * incomplete SQL command.  *prompt is set to the appropriate prompt type.
    1108              :  *
    1109              :  * PSCAN_EOL: the end of the line was reached, and there is no lexical
    1110              :  * reason to consider the command incomplete.  The caller may or may not
    1111              :  * choose to send it.  *prompt is set to the appropriate prompt type if
    1112              :  * the caller chooses to collect more input.
    1113              :  *
    1114              :  * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
    1115              :  * be called next, then the cycle may be repeated with a fresh input line.
    1116              :  *
    1117              :  * In all cases, *prompt is set to an appropriate prompt type code for the
    1118              :  * next line-input operation.
    1119              :  */
    1120              : PsqlScanResult
    1121              : psql_scan(PsqlScanState state,
    1122       610442 :           PQExpBuffer query_buf,
    1123              :           promptStatus_t *prompt)
    1124              : {
    1125              :     PsqlScanResult result;
    1126              :     int         lexresult;
    1127              : 
    1128              :     /* Must be scanning already */
    1129              :     Assert(state->scanbufhandle != NULL);
    1130              : 
    1131              :     /* Set current output target */
    1132              :     state->output_buf = query_buf;
    1133       610442 : 
    1134              :     /* Set input source */
    1135              :     if (state->buffer_stack != NULL)
    1136       610442 :         yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
    1137           45 :     else
    1138              :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1139       610397 : 
    1140              :     /* And lex. */
    1141              :     lexresult = yylex(NULL, state->scanner);
    1142       610442 : 
    1143              :     /* Notify psql_scan_get_location() that a yylex call has been made. */
    1144              :     if (state->cur_line_no == 0)
    1145       610442 :         state->cur_line_no = 1;
    1146       397684 : 
    1147              :     /*
    1148              :      * Check termination state and return appropriate result info.
    1149              :      */
    1150              :     switch (lexresult)
    1151       610442 :     {
    1152              :         case LEXRES_EOL:        /* end of input */
    1153       397391 :             switch (state->start_state)
    1154       397391 :             {
    1155              :                 case INITIAL:
    1156       372428 :                 case xqs:       /* we treat this like INITIAL */
    1157              :                     if (state->paren_depth > 0)
    1158       372428 :                     {
    1159              :                         result = PSCAN_INCOMPLETE;
    1160        32051 :                         *prompt = PROMPT_PAREN;
    1161        32051 :                     }
    1162              :                     else if (state->begin_depth > 0)
    1163       340377 :                     {
    1164              :                         result = PSCAN_INCOMPLETE;
    1165          427 :                         *prompt = PROMPT_CONTINUE;
    1166          427 :                     }
    1167              :                     else if (query_buf->len > 0)
    1168       339950 :                     {
    1169              :                         result = PSCAN_EOL;
    1170        73351 :                         *prompt = PROMPT_CONTINUE;
    1171        73351 :                     }
    1172              :                     else
    1173              :                     {
    1174              :                         /* never bother to send an empty buffer */
    1175              :                         result = PSCAN_INCOMPLETE;
    1176       266599 :                         *prompt = PROMPT_READY;
    1177       266599 :                     }
    1178              :                     break;
    1179       372428 :                 case xb:
    1180            0 :                     result = PSCAN_INCOMPLETE;
    1181            0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1182            0 :                     break;
    1183            0 :                 case xc:
    1184          401 :                     result = PSCAN_INCOMPLETE;
    1185          401 :                     *prompt = PROMPT_COMMENT;
    1186          401 :                     break;
    1187          401 :                 case xd:
    1188           19 :                     result = PSCAN_INCOMPLETE;
    1189           19 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1190           19 :                     break;
    1191           19 :                 case xh:
    1192            0 :                     result = PSCAN_INCOMPLETE;
    1193            0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1194            0 :                     break;
    1195            0 :                 case xe:
    1196          301 :                     result = PSCAN_INCOMPLETE;
    1197          301 :                     *prompt = PROMPT_SINGLEQUOTE;
    1198          301 :                     break;
    1199          301 :                 case xq:
    1200         5574 :                     result = PSCAN_INCOMPLETE;
    1201         5574 :                     *prompt = PROMPT_SINGLEQUOTE;
    1202         5574 :                     break;
    1203         5574 :                 case xdolq:
    1204        18668 :                     result = PSCAN_INCOMPLETE;
    1205        18668 :                     *prompt = PROMPT_DOLLARQUOTE;
    1206        18668 :                     break;
    1207        18668 :                 case xui:
    1208            0 :                     result = PSCAN_INCOMPLETE;
    1209            0 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1210            0 :                     break;
    1211            0 :                 case xus:
    1212            0 :                     result = PSCAN_INCOMPLETE;
    1213            0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1214            0 :                     break;
    1215            0 :                 default:
    1216            0 :                     /* can't get here */
    1217              :                     fprintf(stderr, "invalid YY_START\n");
    1218            0 :                     exit(1);
    1219            0 :             }
    1220              :             break;
    1221       397391 :         case LEXRES_SEMI:       /* semicolon */
    1222       184110 :             result = PSCAN_SEMICOLON;
    1223       184110 :             *prompt = PROMPT_READY;
    1224       184110 :             break;
    1225       184110 :         case LEXRES_BACKSLASH:  /* backslash */
    1226        28941 :             result = PSCAN_BACKSLASH;
    1227        28941 :             *prompt = PROMPT_READY;
    1228        28941 :             break;
    1229        28941 :         default:
    1230            0 :             /* can't get here */
    1231              :             fprintf(stderr, "invalid yylex result\n");
    1232            0 :             exit(1);
    1233            0 :     }
    1234              : 
    1235              :     return result;
    1236       610442 : }
    1237              : 
    1238              : /*
    1239              :  * Clean up after scanning a string.  This flushes any unread input and
    1240              :  * releases resources (but not the PsqlScanState itself).  Note however
    1241              :  * that this does not reset the lexer scan state; that can be done by
    1242              :  * psql_scan_reset(), which is an orthogonal operation.
    1243              :  *
    1244              :  * It is legal to call this when not scanning anything (makes it easier
    1245              :  * to deal with error recovery).
    1246              :  */
    1247              : void
    1248              : psql_scan_finish(PsqlScanState state)
    1249       407278 : {
    1250              :     /* Drop any incomplete variable expansions. */
    1251              :     while (state->buffer_stack != NULL)
    1252       407278 :         psqlscan_pop_buffer_stack(state);
    1253            0 : 
    1254              :     /* Done with the outer scan buffer, too */
    1255              :     if (state->scanbufhandle)
    1256       407278 :         yy_delete_buffer(state->scanbufhandle, state->scanner);
    1257       397631 :     state->scanbufhandle = NULL;
    1258       407278 :     if (state->scanbuf)
    1259       407278 :         free(state->scanbuf);
    1260       397631 :     state->scanbuf = NULL;
    1261       407278 : }
    1262       407278 : 
    1263              : /*
    1264              :  * Reset lexer scanning state to start conditions.  This is appropriate
    1265              :  * for executing \r psql commands (or any other time that we discard the
    1266              :  * prior contents of query_buf).  It is not, however, necessary to do this
    1267              :  * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
    1268              :  * PSCAN_EOL scan result, because the scan state must be INITIAL when those
    1269              :  * conditions are returned.
    1270              :  *
    1271              :  * Note that this is unrelated to flushing unread input; that task is
    1272              :  * done by psql_scan_finish().
    1273              :  */
    1274              : void
    1275              : psql_scan_reset(PsqlScanState state)
    1276        21592 : {
    1277              :     state->start_state = INITIAL;
    1278        21592 :     state->paren_depth = 0;
    1279        21592 :     state->xcdepth = 0;          /* not really necessary */
    1280        21592 :     if (state->dolqstart)
    1281        21592 :         free(state->dolqstart);
    1282            0 :     state->dolqstart = NULL;
    1283        21592 :     state->identifier_count = 0;
    1284        21592 :     state->begin_depth = 0;
    1285        21592 : }
    1286        21592 : 
    1287              : /*
    1288              :  * Reselect this lexer (psqlscan.l) after using another one.
    1289              :  *
    1290              :  * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
    1291              :  * state, because we'd never switch to another lexer in a different state.
    1292              :  * However, we don't want to reset e.g. paren_depth, so this can't be
    1293              :  * the same as psql_scan_reset().
    1294              :  *
    1295              :  * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
    1296              :  * must be a superset of this.
    1297              :  *
    1298              :  * Note: it seems likely that other lexers could just assign INITIAL for
    1299              :  * themselves, since that probably has the value zero in every flex-generated
    1300              :  * lexer.  But let's not assume that.
    1301              :  */
    1302              : void
    1303              : psql_scan_reselect_sql_lexer(PsqlScanState state)
    1304       138436 : {
    1305              :     state->start_state = INITIAL;
    1306       138436 : }
    1307       138436 : 
    1308              : /*
    1309              :  * Return true if lexer is currently in an "inside quotes" state.
    1310              :  *
    1311              :  * This is pretty grotty but is needed to preserve the old behavior
    1312              :  * that mainloop.c drops blank lines not inside quotes without even
    1313              :  * echoing them.
    1314              :  */
    1315              : bool
    1316              : psql_scan_in_quote(PsqlScanState state)
    1317        75664 : {
    1318              :     return state->start_state != INITIAL &&
    1319        76156 :         state->start_state != xqs;
    1320          492 : }
    1321              : 
    1322              : /*
    1323              :  * Return the current scanning location (end+1 of last scanned token),
    1324              :  * as a line number counted from 1 and an offset from string start.
    1325              :  *
    1326              :  * This considers only the outermost input string, and therefore is of
    1327              :  * limited use for programs that use psqlscan_push_new_buffer().
    1328              :  *
    1329              :  * It would be a bit easier probably to use "%option yylineno" to count
    1330              :  * lines, but the flex manual says that has a performance cost, and only
    1331              :  * a minority of programs using psqlscan have need for this functionality.
    1332              :  * So we implement it ourselves without adding overhead to the lexer itself.
    1333              :  */
    1334              : void
    1335              : psql_scan_get_location(PsqlScanState state,
    1336         1737 :                        int *lineno, int *offset)
    1337              : {
    1338              :     const char *line_end;
    1339              : 
    1340              :     /*
    1341              :      * We rely on flex's having stored a NUL after the current token in
    1342              :      * scanbuf.  Therefore we must specially handle the state before yylex()
    1343              :      * has been called, when obviously that won't have happened yet.
    1344              :      */
    1345              :     if (state->cur_line_no == 0)
    1346         1737 :     {
    1347              :         *lineno = 1;
    1348            0 :         *offset = 0;
    1349            0 :         return;
    1350            0 :     }
    1351              : 
    1352              :     /*
    1353              :      * Advance cur_line_no/cur_line_ptr past whatever has been lexed so far.
    1354              :      * Doing this prevents repeated calls from being O(N^2) for long inputs.
    1355              :      */
    1356              :     while ((line_end = strchr(state->cur_line_ptr, '\n')) != NULL)
    1357         2210 :     {
    1358              :         state->cur_line_no++;
    1359          473 :         state->cur_line_ptr = line_end + 1;
    1360          473 :     }
    1361              :     state->cur_line_ptr += strlen(state->cur_line_ptr);
    1362         1737 : 
    1363              :     /* Report current location. */
    1364              :     *lineno = state->cur_line_no;
    1365         1737 :     *offset = state->cur_line_ptr - state->scanbuf;
    1366         1737 : }
    1367              : 
    1368              : /*
    1369              :  * Push the given string onto the stack of stuff to scan.
    1370              :  *
    1371              :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1372              :  */
    1373              : void
    1374              : psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
    1375          669 :                          const char *varname)
    1376              : {
    1377              :     StackElem  *stackelem;
    1378              : 
    1379              :     stackelem = pg_malloc_object(StackElem);
    1380          669 : 
    1381              :     /*
    1382              :      * In current usage, the passed varname points at the current flex input
    1383              :      * buffer; we must copy it before calling psqlscan_prepare_buffer()
    1384              :      * because that will change the buffer state.
    1385              :      */
    1386              :     stackelem->varname = varname ? pg_strdup(varname) : NULL;
    1387          669 : 
    1388              :     stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
    1389          669 :                                              &stackelem->bufstring);
    1390              :     state->curline = stackelem->bufstring;
    1391          669 :     if (state->safe_encoding)
    1392          669 :     {
    1393              :         stackelem->origstring = NULL;
    1394          669 :         state->refline = stackelem->bufstring;
    1395          669 :     }
    1396              :     else
    1397              :     {
    1398              :         stackelem->origstring = pg_strdup(newstr);
    1399            0 :         state->refline = stackelem->origstring;
    1400            0 :     }
    1401              :     stackelem->next = state->buffer_stack;
    1402          669 :     state->buffer_stack = stackelem;
    1403          669 : }
    1404          669 : 
    1405              : /*
    1406              :  * Pop the topmost buffer stack item (there must be one!)
    1407              :  *
    1408              :  * NB: after this, the flex input state is unspecified; caller must
    1409              :  * switch to an appropriate buffer to continue lexing.
    1410              :  * See psqlscan_select_top_buffer().
    1411              :  */
    1412              : void
    1413              : psqlscan_pop_buffer_stack(PsqlScanState state)
    1414          669 : {
    1415              :     StackElem  *stackelem = state->buffer_stack;
    1416          669 : 
    1417              :     state->buffer_stack = stackelem->next;
    1418          669 :     yy_delete_buffer(stackelem->buf, state->scanner);
    1419          669 :     free(stackelem->bufstring);
    1420          669 :     if (stackelem->origstring)
    1421          669 :         free(stackelem->origstring);
    1422            0 :     if (stackelem->varname)
    1423          669 :         free(stackelem->varname);
    1424          669 :     free(stackelem);
    1425          669 : }
    1426          669 : 
    1427              : /*
    1428              :  * Select the topmost surviving buffer as the active input.
    1429              :  */
    1430              : void
    1431              : psqlscan_select_top_buffer(PsqlScanState state)
    1432          669 : {
    1433              :     StackElem  *stackelem = state->buffer_stack;
    1434          669 : 
    1435              :     if (stackelem != NULL)
    1436          669 :     {
    1437              :         yy_switch_to_buffer(stackelem->buf, state->scanner);
    1438            0 :         state->curline = stackelem->bufstring;
    1439            0 :         state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
    1440            0 :     }
    1441              :     else
    1442              :     {
    1443              :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1444          669 :         state->curline = state->scanbuf;
    1445          669 :         state->refline = state->scanline;
    1446          669 :     }
    1447              : }
    1448          669 : 
    1449              : /*
    1450              :  * Check if specified variable name is the source for any string
    1451              :  * currently being scanned
    1452              :  */
    1453              : bool
    1454              : psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
    1455          669 : {
    1456              :     StackElem  *stackelem;
    1457              : 
    1458              :     for (stackelem = state->buffer_stack;
    1459          669 :          stackelem != NULL;
    1460          669 :          stackelem = stackelem->next)
    1461            0 :     {
    1462              :         if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
    1463            0 :             return true;
    1464            0 :     }
    1465              :     return false;
    1466          669 : }
    1467              : 
    1468              : /*
    1469              :  * Set up a flex input buffer to scan the given data.  We always make a
    1470              :  * copy of the data.  If working in an unsafe encoding, the copy has
    1471              :  * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
    1472              :  *
    1473              :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1474              :  */
    1475              : YY_BUFFER_STATE
    1476              : psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
    1477       398355 :                         char **txtcopy)
    1478              : {
    1479              :     char       *newtxt;
    1480              : 
    1481              :     /* Flex wants two \0 characters after the actual data */
    1482              :     newtxt = pg_malloc_array(char, (len + 2));
    1483       398355 :     *txtcopy = newtxt;
    1484       398355 :     newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
    1485       398355 : 
    1486              :     if (state->safe_encoding)
    1487       398355 :         memcpy(newtxt, txt, len);
    1488       398215 :     else
    1489              :     {
    1490              :         /* Gotta do it the hard way */
    1491              :         int         i = 0;
    1492          140 : 
    1493              :         while (i < len)
    1494          808 :         {
    1495              :             int         thislen = PQmblen(txt + i, state->encoding);
    1496          668 : 
    1497              :             /* first byte should always be okay... */
    1498              :             newtxt[i] = txt[i];
    1499          668 :             i++;
    1500          668 :             while (--thislen > 0 && i < len)
    1501          808 :                 newtxt[i++] = (char) 0xFF;
    1502          140 :         }
    1503              :     }
    1504              : 
    1505              :     return yy_scan_buffer(newtxt, len + 2, state->scanner);
    1506       398355 : }
    1507              : 
    1508              : /*
    1509              :  * psqlscan_emit() --- body for ECHO macro
    1510              :  *
    1511              :  * NB: this must be used for ALL and ONLY the text copied from the flex
    1512              :  * input data.  If you pass it something that is not part of the yytext
    1513              :  * string, you are making a mistake.  Internally generated text can be
    1514              :  * appended directly to state->output_buf.
    1515              :  */
    1516              : void
    1517              : psqlscan_emit(PsqlScanState state, const char *txt, int len)
    1518      5100502 : {
    1519              :     PQExpBuffer output_buf = state->output_buf;
    1520      5100502 : 
    1521              :     if (state->safe_encoding)
    1522      5100502 :         appendBinaryPQExpBuffer(output_buf, txt, len);
    1523      5100026 :     else
    1524              :     {
    1525              :         /* Gotta do it the hard way */
    1526              :         const char *reference = state->refline;
    1527          476 :         int         i;
    1528              : 
    1529              :         reference += (txt - state->curline);
    1530          476 : 
    1531              :         for (i = 0; i < len; i++)
    1532         1277 :         {
    1533              :             char        ch = txt[i];
    1534          801 : 
    1535              :             if (ch == (char) 0xFF)
    1536          801 :                 ch = reference[i];
    1537          140 :             appendPQExpBufferChar(output_buf, ch);
    1538          801 :         }
    1539              :     }
    1540              : }
    1541      5100502 : 
    1542              : /*
    1543              :  * psqlscan_extract_substring --- fetch value of (part of) the current token
    1544              :  *
    1545              :  * This is like psqlscan_emit(), except that the data is returned as a
    1546              :  * malloc'd string rather than being pushed directly to state->output_buf.
    1547              :  */
    1548              : char *
    1549              : psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
    1550         2670 : {
    1551              :     char       *result = pg_malloc_array(char, (len + 1));
    1552         2670 : 
    1553              :     if (state->safe_encoding)
    1554         2670 :         memcpy(result, txt, len);
    1555         2670 :     else
    1556              :     {
    1557              :         /* Gotta do it the hard way */
    1558              :         const char *reference = state->refline;
    1559            0 :         int         i;
    1560              : 
    1561              :         reference += (txt - state->curline);
    1562            0 : 
    1563              :         for (i = 0; i < len; i++)
    1564            0 :         {
    1565              :             char        ch = txt[i];
    1566            0 : 
    1567              :             if (ch == (char) 0xFF)
    1568            0 :                 ch = reference[i];
    1569            0 :             result[i] = ch;
    1570            0 :         }
    1571              :     }
    1572              :     result[len] = '\0';
    1573         2670 :     return result;
    1574         2670 : }
    1575              : 
    1576              : /*
    1577              :  * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
    1578              :  *
    1579              :  * If the variable name is found, escape its value using the appropriate
    1580              :  * quoting method and emit the value to output_buf.  (Since the result is
    1581              :  * surely quoted, there is never any reason to rescan it.)  If we don't
    1582              :  * find the variable or escaping fails, emit the token as-is.
    1583              :  */
    1584              : void
    1585              : psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
    1586          519 :                          PsqlScanQuoteType quote)
    1587              : {
    1588              :     char       *varname;
    1589              :     char       *value;
    1590              : 
    1591              :     /* Variable lookup. */
    1592              :     varname = psqlscan_extract_substring(state, txt + 2, len - 3);
    1593          519 :     if (state->callbacks->get_variable)
    1594          519 :         value = state->callbacks->get_variable(varname, quote,
    1595          519 :                                                state->cb_passthrough);
    1596              :     else
    1597              :         value = NULL;
    1598            0 :     free(varname);
    1599          519 : 
    1600              :     if (value)
    1601          519 :     {
    1602              :         /* Emit the suitably-escaped value */
    1603              :         appendPQExpBufferStr(state->output_buf, value);
    1604          491 :         free(value);
    1605          491 :     }
    1606              :     else
    1607              :     {
    1608              :         /* Emit original token as-is */
    1609              :         psqlscan_emit(state, txt, len);
    1610           28 :     }
    1611              : }
    1612          519 : 
    1613              : void
    1614              : psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
    1615           16 : {
    1616              :     char       *varname;
    1617              :     char       *value;
    1618              : 
    1619              :     varname = psqlscan_extract_substring(state, txt + 3, len - 4);
    1620           16 :     if (state->callbacks->get_variable)
    1621           16 :         value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
    1622           16 :                                                state->cb_passthrough);
    1623              :     else
    1624              :         value = NULL;
    1625            0 :     free(varname);
    1626           16 : 
    1627              :     if (value != NULL)
    1628           16 :     {
    1629              :         appendPQExpBufferStr(state->output_buf, "TRUE");
    1630            7 :         free(value);
    1631            7 :     }
    1632              :     else
    1633              :     {
    1634              :         appendPQExpBufferStr(state->output_buf, "FALSE");
    1635            9 :     }
    1636              : }
    1637           16 : /* END: function "psqlscan_test_variable" */
        

Generated by: LCOV version 2.0-1