LCOV - code coverage report
Current view: top level - src/fe_utils - psqlscan.l (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 170 226 75.2 %
Date: 2024-05-08 14:10:47 Functions: 18 18 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : %top{
       2             : /*-------------------------------------------------------------------------
       3             :  *
       4             :  * psqlscan.l
       5             :  *    lexical scanner for SQL commands
       6             :  *
       7             :  * This lexer used to be part of psql, and that heritage is reflected in
       8             :  * the file name as well as function and typedef names, though it can now
       9             :  * be used by other frontend programs as well.  It's also possible to extend
      10             :  * this lexer with a compatible add-on lexer to handle program-specific
      11             :  * backslash commands.
      12             :  *
      13             :  * This code is mainly concerned with determining where the end of a SQL
      14             :  * statement is: we are looking for semicolons that are not within quotes,
      15             :  * comments, or parentheses.  The most reliable way to handle this is to
      16             :  * borrow the backend's flex lexer rules, lock, stock, and barrel.  The rules
      17             :  * below are (except for a few) the same as the backend's, but their actions
      18             :  * are just ECHO whereas the backend's actions generally do other things.
      19             :  *
      20             :  * XXX The rules in this file must be kept in sync with the backend lexer!!!
      21             :  *
      22             :  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
      23             :  *
      24             :  * See psqlscan_int.h for additional commentary.
      25             :  *
      26             :  *
      27             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      28             :  * Portions Copyright (c) 1994, Regents of the University of California
      29             :  *
      30             :  * IDENTIFICATION
      31             :  *    src/fe_utils/psqlscan.l
      32             :  *
      33             :  *-------------------------------------------------------------------------
      34             :  */
      35             : #include "postgres_fe.h"
      36             : 
      37             : #include "common/logging.h"
      38             : #include "fe_utils/psqlscan.h"
      39             : 
      40             : #include "libpq-fe.h"
      41             : }
      42             : 
      43             : %{
      44             : 
      45             : /* LCOV_EXCL_START */
      46             : 
      47             : #include "fe_utils/psqlscan_int.h"
      48             : 
      49             : /*
      50             :  * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
      51             :  * doesn't presently make use of that argument, so just declare it as int.
      52             :  */
      53             : typedef int YYSTYPE;
      54             : 
      55             : /*
      56             :  * Set the type of yyextra; we use it as a pointer back to the containing
      57             :  * PsqlScanState.
      58             :  */
      59             : #define YY_EXTRA_TYPE PsqlScanState
      60             : 
      61             : 
      62             : /* Return values from yylex() */
      63             : #define LEXRES_EOL          0   /* end of input */
      64             : #define LEXRES_SEMI         1   /* command-terminating semicolon found */
      65             : #define LEXRES_BACKSLASH    2   /* backslash command start */
      66             : 
      67             : 
      68             : #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
      69             : 
      70             : /*
      71             :  * Work around a bug in flex 2.5.35: it emits a couple of functions that
      72             :  * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
      73             :  * this would cause warnings.  Providing our own declarations should be
      74             :  * harmless even when the bug gets fixed.
      75             :  */
      76             : extern int  psql_yyget_column(yyscan_t yyscanner);
      77             : extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
      78             : 
      79             : %}
      80             : 
      81             : %option reentrant
      82             : %option bison-bridge
      83             : %option 8bit
      84             : %option never-interactive
      85             : %option nodefault
      86             : %option noinput
      87             : %option nounput
      88             : %option noyywrap
      89             : %option warn
      90             : %option prefix="psql_yy"
      91             : 
      92             : /*
      93             :  * All of the following definitions and rules should exactly match
      94             :  * src/backend/parser/scan.l so far as the flex patterns are concerned.
      95             :  * The rule bodies are just ECHO as opposed to what the backend does,
      96             :  * however.  (But be sure to duplicate code that affects the lexing process,
      97             :  * such as BEGIN() and yyless().)  Also, psqlscan uses a single <<EOF>> rule
      98             :  * whereas scan.l has a separate one for each exclusive state.
      99             :  */
     100             : 
     101             : /*
     102             :  * OK, here is a short description of lex/flex rules behavior.
     103             :  * The longest pattern which matches an input string is always chosen.
     104             :  * For equal-length patterns, the first occurring in the rules list is chosen.
     105             :  * INITIAL is the starting state, to which all non-conditional rules apply.
     106             :  * Exclusive states change parsing rules while the state is active.  When in
     107             :  * an exclusive state, only those rules defined for that state apply.
     108             :  *
     109             :  * We use exclusive states for quoted strings, extended comments,
     110             :  * and to eliminate parsing troubles for numeric strings.
     111             :  * Exclusive states:
     112             :  *  <xb> bit string literal
     113             :  *  <xc> extended C-style comments
     114             :  *  <xd> delimited identifiers (double-quoted identifiers)
     115             :  *  <xh> hexadecimal byte string
     116             :  *  <xq> standard quoted strings
     117             :  *  <xqs> quote stop (detect continued strings)
     118             :  *  <xe> extended quoted strings (support backslash escape sequences)
     119             :  *  <xdolq> $foo$ quoted strings
     120             :  *  <xui> quoted identifier with Unicode escapes
     121             :  *  <xus> quoted string with Unicode escapes
     122             :  *
     123             :  * Note: we intentionally don't mimic the backend's <xeu> state; we have
     124             :  * no need to distinguish it from <xe> state, and no good way to get out
     125             :  * of it in error cases.  The backend just throws yyerror() in those
     126             :  * cases, but that's not an option here.
     127             :  */
     128             : 
     129             : %x xb
     130             : %x xc
     131             : %x xd
     132             : %x xh
     133             : %x xq
     134             : %x xqs
     135             : %x xe
     136             : %x xdolq
     137             : %x xui
     138             : %x xus
     139             : 
     140             : /*
     141             :  * In order to make the world safe for Windows and Mac clients as well as
     142             :  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
     143             :  * sequence will be seen as two successive newlines, but that doesn't cause
     144             :  * any problems.  Comments that start with -- and extend to the next
     145             :  * newline are treated as equivalent to a single whitespace character.
     146             :  *
     147             :  * NOTE a fine point: if there is no newline following --, we will absorb
     148             :  * everything to the end of the input as a comment.  This is correct.  Older
     149             :  * versions of Postgres failed to recognize -- as a comment if the input
     150             :  * did not end with a newline.
     151             :  *
     152             :  * non_newline_space tracks all space characters except newlines.
     153             :  *
     154             :  * XXX if you change the set of whitespace characters, fix scanner_isspace()
     155             :  * to agree.
     156             :  */
     157             : 
     158             : space               [ \t\n\r\f\v]
     159             : non_newline_space   [ \t\f\v]
     160             : newline             [\n\r]
     161             : non_newline         [^\n\r]
     162             : 
     163             : comment         ("--"{non_newline}*)
     164             : 
     165             : whitespace      ({space}+|{comment})
     166             : 
     167             : /*
     168             :  * SQL requires at least one newline in the whitespace separating
     169             :  * string literals that are to be concatenated.  Silly, but who are we
     170             :  * to argue?  Note that {whitespace_with_newline} should not have * after
     171             :  * it, whereas {whitespace} should generally have a * after it...
     172             :  */
     173             : 
     174             : special_whitespace      ({space}+|{comment}{newline})
     175             : non_newline_whitespace  ({non_newline_space}|{comment})
     176             : whitespace_with_newline ({non_newline_whitespace}*{newline}{special_whitespace}*)
     177             : 
     178             : quote           '
     179             : /* If we see {quote} then {quotecontinue}, the quoted string continues */
     180             : quotecontinue   {whitespace_with_newline}{quote}
     181             : 
     182             : /*
     183             :  * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
     184             :  * {quotecontinue}.  It might seem that this could just be {whitespace}*,
     185             :  * but if there's a dash after {whitespace_with_newline}, it must be consumed
     186             :  * to see if there's another dash --- which would start a {comment} and thus
     187             :  * allow continuation of the {quotecontinue} token.
     188             :  */
     189             : quotecontinuefail   {whitespace}*"-"?
     190             : 
     191             : /* Bit string
     192             :  * It is tempting to scan the string for only those characters
     193             :  * which are allowed. However, this leads to silently swallowed
     194             :  * characters if illegal characters are included in the string.
     195             :  * For example, if xbinside is [01] then B'ABCD' is interpreted
     196             :  * as a zero-length string, and the ABCD' is lost!
     197             :  * Better to pass the string forward and let the input routines
     198             :  * validate the contents.
     199             :  */
     200             : xbstart         [bB]{quote}
     201             : xbinside        [^']*
     202             : 
     203             : /* Hexadecimal byte string */
     204             : xhstart         [xX]{quote}
     205             : xhinside        [^']*
     206             : 
     207             : /* National character */
     208             : xnstart         [nN]{quote}
     209             : 
     210             : /* Quoted string that allows backslash escapes */
     211             : xestart         [eE]{quote}
     212             : xeinside        [^\\']+
     213             : xeescape        [\\][^0-7]
     214             : xeoctesc        [\\][0-7]{1,3}
     215             : xehexesc        [\\]x[0-9A-Fa-f]{1,2}
     216             : xeunicode       [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
     217             : xeunicodefail   [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
     218             : 
     219             : /* Extended quote
     220             :  * xqdouble implements embedded quote, ''''
     221             :  */
     222             : xqstart         {quote}
     223             : xqdouble        {quote}{quote}
     224             : xqinside        [^']+
     225             : 
     226             : /* $foo$ style quotes ("dollar quoting")
     227             :  * The quoted string starts with $foo$ where "foo" is an optional string
     228             :  * in the form of an identifier, except that it may not contain "$",
     229             :  * and extends to the first occurrence of an identical string.
     230             :  * There is *no* processing of the quoted text.
     231             :  *
     232             :  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
     233             :  * fails to match its trailing "$".
     234             :  */
     235             : dolq_start      [A-Za-z\200-\377_]
     236             : dolq_cont       [A-Za-z\200-\377_0-9]
     237             : dolqdelim       \$({dolq_start}{dolq_cont}*)?\$
     238             : dolqfailed      \${dolq_start}{dolq_cont}*
     239             : dolqinside      [^$]+
     240             : 
     241             : /* Double quote
     242             :  * Allows embedded spaces and other special characters into identifiers.
     243             :  */
     244             : dquote          \"
     245             : xdstart         {dquote}
     246             : xdstop          {dquote}
     247             : xddouble        {dquote}{dquote}
     248             : xdinside        [^"]+
     249             : 
     250             : /* Quoted identifier with Unicode escapes */
     251             : xuistart        [uU]&{dquote}
     252             : 
     253             : /* Quoted string with Unicode escapes */
     254             : xusstart        [uU]&{quote}
     255             : 
     256             : /* error rule to avoid backup */
     257             : xufailed        [uU]&
     258             : 
     259             : 
     260             : /* C-style comments
     261             :  *
     262             :  * The "extended comment" syntax closely resembles allowable operator syntax.
     263             :  * The tricky part here is to get lex to recognize a string starting with
     264             :  * slash-star as a comment, when interpreting it as an operator would produce
     265             :  * a longer match --- remember lex will prefer a longer match!  Also, if we
     266             :  * have something like plus-slash-star, lex will think this is a 3-character
     267             :  * operator whereas we want to see it as a + operator and a comment start.
     268             :  * The solution is two-fold:
     269             :  * 1. append {op_chars}* to xcstart so that it matches as much text as
     270             :  *    {operator} would. Then the tie-breaker (first matching rule of same
     271             :  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
     272             :  *    in case it contains a star-slash that should terminate the comment.
     273             :  * 2. In the operator rule, check for slash-star within the operator, and
     274             :  *    if found throw it back with yyless().  This handles the plus-slash-star
     275             :  *    problem.
     276             :  * Dash-dash comments have similar interactions with the operator rule.
     277             :  */
     278             : xcstart         \/\*{op_chars}*
     279             : xcstop          \*+\/
     280             : xcinside        [^*/]+
     281             : 
     282             : ident_start     [A-Za-z\200-\377_]
     283             : ident_cont      [A-Za-z\200-\377_0-9\$]
     284             : 
     285             : identifier      {ident_start}{ident_cont}*
     286             : 
     287             : /* Assorted special-case operators and operator-like tokens */
     288             : typecast        "::"
     289             : dot_dot         \.\.
     290             : colon_equals    ":="
     291             : 
     292             : /*
     293             :  * These operator-like tokens (unlike the above ones) also match the {operator}
     294             :  * rule, which means that they might be overridden by a longer match if they
     295             :  * are followed by a comment start or a + or - character. Accordingly, if you
     296             :  * add to this list, you must also add corresponding code to the {operator}
     297             :  * block to return the correct token in such cases. (This is not needed in
     298             :  * psqlscan.l since the token value is ignored there.)
     299             :  */
     300             : equals_greater  "=>"
     301             : less_equals     "<="
     302             : greater_equals  ">="
     303             : less_greater    "<>"
     304             : not_equals      "!="
     305             : 
     306             : /*
     307             :  * "self" is the set of chars that should be returned as single-character
     308             :  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
     309             :  * which can be one or more characters long (but if a single-char token
     310             :  * appears in the "self" set, it is not to be returned as an Op).  Note
     311             :  * that the sets overlap, but each has some chars that are not in the other.
     312             :  *
     313             :  * If you change either set, adjust the character lists appearing in the
     314             :  * rule for "operator"!
     315             :  */
     316             : self            [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
     317             : op_chars        [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
     318             : operator        {op_chars}+
     319             : 
     320             : /*
     321             :  * Numbers
     322             :  *
     323             :  * Unary minus is not part of a number here.  Instead we pass it separately to
     324             :  * the parser, and there it gets coerced via doNegate().
     325             :  *
     326             :  * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
     327             :  *
     328             :  * {realfail} is added to prevent the need for scanner
     329             :  * backup when the {real} rule fails to match completely.
     330             :  */
     331             : decdigit        [0-9]
     332             : hexdigit        [0-9A-Fa-f]
     333             : octdigit        [0-7]
     334             : bindigit        [0-1]
     335             : 
     336             : decinteger      {decdigit}(_?{decdigit})*
     337             : hexinteger      0[xX](_?{hexdigit})+
     338             : octinteger      0[oO](_?{octdigit})+
     339             : bininteger      0[bB](_?{bindigit})+
     340             : 
     341             : hexfail         0[xX]_?
     342             : octfail         0[oO]_?
     343             : binfail         0[bB]_?
     344             : 
     345             : numeric         (({decinteger}\.{decinteger}?)|(\.{decinteger}))
     346             : numericfail     {decdigit}+\.\.
     347             : 
     348             : real            ({decinteger}|{numeric})[Ee][-+]?{decinteger}
     349             : realfail        ({decinteger}|{numeric})[Ee][-+]
     350             : 
     351             : decinteger_junk {decinteger}{ident_start}
     352             : hexinteger_junk {hexinteger}{ident_start}
     353             : octinteger_junk {octinteger}{ident_start}
     354             : bininteger_junk {bininteger}{ident_start}
     355             : numeric_junk    {numeric}{ident_start}
     356             : real_junk       {real}{ident_start}
     357             : 
     358             : param           \${decinteger}
     359             : param_junk      \${decinteger}{ident_start}
     360             : 
     361             : /* psql-specific: characters allowed in variable names */
     362             : variable_char   [A-Za-z\200-\377_0-9]
     363             : 
     364             : other           .
     365             : 
     366             : /*
     367             :  * Dollar quoted strings are totally opaque, and no escaping is done on them.
     368             :  * Other quoted strings must allow some special characters such as single-quote
     369             :  *  and newline.
     370             :  * Embedded single-quotes are implemented both in the SQL standard
     371             :  *  style of two adjacent single quotes "''" and in the Postgres/Java style
     372             :  *  of escaped-quote "\'".
     373             :  * Other embedded escaped characters are matched explicitly and the leading
     374             :  *  backslash is dropped from the string.
     375             :  * Note that xcstart must appear before operator, as explained above!
     376             :  *  Also whitespace (comment) must appear before operator.
     377             :  */
     378             : 
     379             : %%
     380             : 
     381             : %{
     382             :         /* Declare some local variables inside yylex(), for convenience */
     383             :         PsqlScanState cur_state = yyextra;
     384             :         PQExpBuffer output_buf = cur_state->output_buf;
     385             : 
     386             :         /*
     387             :          * Force flex into the state indicated by start_state.  This has a
     388             :          * couple of purposes: it lets some of the functions below set a new
     389             :          * starting state without ugly direct access to flex variables, and it
     390             :          * allows us to transition from one flex lexer to another so that we
     391             :          * can lex different parts of the source string using separate lexers.
     392             :          */
     393             :         BEGIN(cur_state->start_state);
     394             : %}
     395             : 
     396             : {whitespace}    {
     397             :                     /*
     398             :                      * Note that the whitespace rule includes both true
     399             :                      * whitespace and single-line ("--" style) comments.
     400             :                      * We suppress whitespace until we have collected some
     401             :                      * non-whitespace data.  (This interacts with some
     402             :                      * decisions in MainLoop(); see there for details.)
     403             :                      */
     404             :                     if (output_buf->len > 0)
     405             :                         ECHO;
     406             :                 }
     407             : 
     408             : {xcstart}       {
     409             :                     cur_state->xcdepth = 0;
     410             :                     BEGIN(xc);
     411             :                     /* Put back any characters past slash-star; see above */
     412             :                     yyless(2);
     413             :                     ECHO;
     414             :                 }
     415             : 
     416             : <xc>{
     417             : {xcstart}       {
     418             :                     cur_state->xcdepth++;
     419             :                     /* Put back any characters past slash-star; see above */
     420             :                     yyless(2);
     421             :                     ECHO;
     422             :                 }
     423             : 
     424             : {xcstop}        {
     425             :                     if (cur_state->xcdepth <= 0)
     426             :                         BEGIN(INITIAL);
     427             :                     else
     428             :                         cur_state->xcdepth--;
     429             :                     ECHO;
     430             :                 }
     431             : 
     432             : {xcinside}      {
     433             :                     ECHO;
     434             :                 }
     435             : 
     436             : {op_chars}      {
     437             :                     ECHO;
     438             :                 }
     439             : 
     440             : \*+             {
     441             :                     ECHO;
     442             :                 }
     443             : } /* <xc> */
     444             : 
     445             : {xbstart}       {
     446             :                     BEGIN(xb);
     447             :                     ECHO;
     448             :                 }
     449             : <xh>{xhinside}    |
     450             : <xb>{xbinside}    {
     451             :                     ECHO;
     452             :                 }
     453             : 
     454             : {xhstart}       {
     455             :                     /* Hexadecimal bit type.
     456             :                      * At some point we should simply pass the string
     457             :                      * forward to the parser and label it there.
     458             :                      * In the meantime, place a leading "x" on the string
     459             :                      * to mark it for the input routine as a hex string.
     460             :                      */
     461             :                     BEGIN(xh);
     462             :                     ECHO;
     463             :                 }
     464             : 
     465             : {xnstart}       {
     466             :                     yyless(1);  /* eat only 'n' this time */
     467             :                     ECHO;
     468             :                 }
     469             : 
     470             : {xqstart}       {
     471             :                     if (cur_state->std_strings)
     472             :                         BEGIN(xq);
     473             :                     else
     474             :                         BEGIN(xe);
     475             :                     ECHO;
     476             :                 }
     477             : {xestart}       {
     478             :                     BEGIN(xe);
     479             :                     ECHO;
     480             :                 }
     481             : {xusstart}      {
     482             :                     BEGIN(xus);
     483             :                     ECHO;
     484             :                 }
     485             : 
     486             : <xb,xh,xq,xe,xus>{quote} {
     487             :                     /*
     488             :                      * When we are scanning a quoted string and see an end
     489             :                      * quote, we must look ahead for a possible continuation.
     490             :                      * If we don't see one, we know the end quote was in fact
     491             :                      * the end of the string.  To reduce the lexer table size,
     492             :                      * we use a single "xqs" state to do the lookahead for all
     493             :                      * types of strings.
     494             :                      */
     495             :                     cur_state->state_before_str_stop = YYSTATE;
     496             :                     BEGIN(xqs);
     497             :                     ECHO;
     498             :                 }
     499             : <xqs>{quotecontinue} {
     500             :                     /*
     501             :                      * Found a quote continuation, so return to the in-quote
     502             :                      * state and continue scanning the literal.  Nothing is
     503             :                      * added to the literal's contents.
     504             :                      */
     505             :                     BEGIN(cur_state->state_before_str_stop);
     506             :                     ECHO;
     507             :                 }
     508             : <xqs>{quotecontinuefail} |
     509             : <xqs>{other}  {
     510             :                     /*
     511             :                      * Failed to see a quote continuation.  Throw back
     512             :                      * everything after the end quote, and handle the string
     513             :                      * according to the state we were in previously.
     514             :                      */
     515             :                     yyless(0);
     516             :                     BEGIN(INITIAL);
     517             :                     /* There's nothing to echo ... */
     518             :                 }
     519             : 
     520             : <xq,xe,xus>{xqdouble} {
     521             :                     ECHO;
     522             :                 }
     523             : <xq,xus>{xqinside}  {
     524             :                     ECHO;
     525             :                 }
     526             : <xe>{xeinside}  {
     527             :                     ECHO;
     528             :                 }
     529             : <xe>{xeunicode} {
     530             :                     ECHO;
     531             :                 }
     532             : <xe>{xeunicodefail}   {
     533             :                     ECHO;
     534             :                 }
     535             : <xe>{xeescape}  {
     536             :                     ECHO;
     537             :                 }
     538             : <xe>{xeoctesc}  {
     539             :                     ECHO;
     540             :                 }
     541             : <xe>{xehexesc}  {
     542             :                     ECHO;
     543             :                 }
     544             : <xe>.         {
     545             :                     /* This is only needed for \ just before EOF */
     546             :                     ECHO;
     547             :                 }
     548             : 
     549             : {dolqdelim}     {
     550             :                     cur_state->dolqstart = pg_strdup(yytext);
     551             :                     BEGIN(xdolq);
     552             :                     ECHO;
     553             :                 }
     554             : {dolqfailed}    {
     555             :                     /* throw back all but the initial "$" */
     556             :                     yyless(1);
     557             :                     ECHO;
     558             :                 }
     559             : <xdolq>{dolqdelim} {
     560             :                     if (strcmp(yytext, cur_state->dolqstart) == 0)
     561             :                     {
     562             :                         free(cur_state->dolqstart);
     563             :                         cur_state->dolqstart = NULL;
     564             :                         BEGIN(INITIAL);
     565             :                     }
     566             :                     else
     567             :                     {
     568             :                         /*
     569             :                          * When we fail to match $...$ to dolqstart, transfer
     570             :                          * the $... part to the output, but put back the final
     571             :                          * $ for rescanning.  Consider $delim$...$junk$delim$
     572             :                          */
     573             :                         yyless(yyleng - 1);
     574             :                     }
     575             :                     ECHO;
     576             :                 }
     577             : <xdolq>{dolqinside} {
     578             :                     ECHO;
     579             :                 }
     580             : <xdolq>{dolqfailed} {
     581             :                     ECHO;
     582             :                 }
     583             : <xdolq>.      {
     584             :                     /* This is only needed for $ inside the quoted text */
     585             :                     ECHO;
     586             :                 }
     587             : 
     588             : {xdstart}       {
     589             :                     BEGIN(xd);
     590             :                     ECHO;
     591             :                 }
     592             : {xuistart}      {
     593             :                     BEGIN(xui);
     594             :                     ECHO;
     595             :                 }
     596             : <xd>{xdstop}  {
     597             :                     BEGIN(INITIAL);
     598             :                     ECHO;
     599             :                 }
     600             : <xui>{dquote} {
     601             :                     BEGIN(INITIAL);
     602             :                     ECHO;
     603             :                 }
     604             : <xd,xui>{xddouble}    {
     605             :                     ECHO;
     606             :                 }
     607             : <xd,xui>{xdinside}    {
     608             :                     ECHO;
     609             :                 }
     610             : 
     611             : {xufailed}  {
     612             :                     /* throw back all but the initial u/U */
     613             :                     yyless(1);
     614             :                     ECHO;
     615             :                 }
     616             : 
     617             : {typecast}      {
     618             :                     ECHO;
     619             :                 }
     620             : 
     621             : {dot_dot}       {
     622             :                     ECHO;
     623             :                 }
     624             : 
     625             : {colon_equals}  {
     626             :                     ECHO;
     627             :                 }
     628             : 
     629             : {equals_greater} {
     630             :                     ECHO;
     631             :                 }
     632             : 
     633             : {less_equals}   {
     634             :                     ECHO;
     635             :                 }
     636             : 
     637             : {greater_equals} {
     638             :                     ECHO;
     639             :                 }
     640             : 
     641             : {less_greater}  {
     642             :                     ECHO;
     643             :                 }
     644             : 
     645             : {not_equals}    {
     646             :                     ECHO;
     647             :                 }
     648             : 
     649             :     /*
     650             :      * These rules are specific to psql --- they implement parenthesis
     651             :      * counting and detection of command-ending semicolon.  These must
     652             :      * appear before the {self} rule so that they take precedence over it.
     653             :      */
     654             : 
     655             : "("               {
     656             :                     cur_state->paren_depth++;
     657             :                     ECHO;
     658             :                 }
     659             : 
     660             : ")"               {
     661             :                     if (cur_state->paren_depth > 0)
     662             :                         cur_state->paren_depth--;
     663             :                     ECHO;
     664             :                 }
     665             : 
     666             : ";"               {
     667             :                     ECHO;
     668             :                     if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
     669             :                     {
     670             :                         /* Terminate lexing temporarily */
     671             :                         cur_state->start_state = YY_START;
     672             :                         cur_state->identifier_count = 0;
     673             :                         return LEXRES_SEMI;
     674             :                     }
     675             :                 }
     676             : 
     677             :     /*
     678             :      * psql-specific rules to handle backslash commands and variable
     679             :      * substitution.  We want these before {self}, also.
     680             :      */
     681             : 
     682             : "\\"[;:]      {
     683             :                     /* Force a semi-colon or colon into the query buffer */
     684             :                     psqlscan_emit(cur_state, yytext + 1, 1);
     685             :                     if (yytext[1] == ';')
     686             :                         cur_state->identifier_count = 0;
     687             :                 }
     688             : 
     689             : "\\"          {
     690             :                     /* Terminate lexing temporarily */
     691             :                     cur_state->start_state = YY_START;
     692             :                     return LEXRES_BACKSLASH;
     693             :                 }
     694             : 
     695             : :{variable_char}+   {
     696             :                     /* Possible psql variable substitution */
     697             :                     char       *varname;
     698             :                     char       *value;
     699             : 
     700             :                     varname = psqlscan_extract_substring(cur_state,
     701             :                                                          yytext + 1,
     702             :                                                          yyleng - 1);
     703             :                     if (cur_state->callbacks->get_variable)
     704             :                         value = cur_state->callbacks->get_variable(varname,
     705             :                                                                    PQUOTE_PLAIN,
     706             :                                                                    cur_state->cb_passthrough);
     707             :                     else
     708             :                         value = NULL;
     709             : 
     710             :                     if (value)
     711             :                     {
     712             :                         /* It is a variable, check for recursion */
     713             :                         if (psqlscan_var_is_current_source(cur_state, varname))
     714             :                         {
     715             :                             /* Recursive expansion --- don't go there */
     716             :                             pg_log_warning("skipping recursive expansion of variable \"%s\"",
     717             :                                                               varname);
     718             :                             /* Instead copy the string as is */
     719             :                             ECHO;
     720             :                         }
     721             :                         else
     722             :                         {
     723             :                             /* OK, perform substitution */
     724             :                             psqlscan_push_new_buffer(cur_state, value, varname);
     725             :                             /* yy_scan_string already made buffer active */
     726             :                         }
     727             :                         free(value);
     728             :                     }
     729             :                     else
     730             :                     {
     731             :                         /*
     732             :                          * if the variable doesn't exist we'll copy the string
     733             :                          * as is
     734             :                          */
     735             :                         ECHO;
     736             :                     }
     737             : 
     738             :                     free(varname);
     739             :                 }
     740             : 
     741             : :'{variable_char}+' {
     742             :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     743             :                                              PQUOTE_SQL_LITERAL);
     744             :                 }
     745             : 
     746             : :\"{variable_char}+\" {
     747             :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     748             :                                              PQUOTE_SQL_IDENT);
     749             :                 }
     750             : 
     751             : :\{\?{variable_char}+\} {
     752             :                     psqlscan_test_variable(cur_state, yytext, yyleng);
     753             :                 }
     754             : 
     755             :     /*
     756             :      * These rules just avoid the need for scanner backup if one of the
     757             :      * three rules above fails to match completely.
     758             :      */
     759             : 
     760             : :'{variable_char}*  {
     761             :                     /* Throw back everything but the colon */
     762             :                     yyless(1);
     763             :                     ECHO;
     764             :                 }
     765             : 
     766             : :\"{variable_char}*    {
     767             :                     /* Throw back everything but the colon */
     768             :                     yyless(1);
     769             :                     ECHO;
     770             :                 }
     771             : 
     772             : :\{\?{variable_char}*   {
     773             :                     /* Throw back everything but the colon */
     774             :                     yyless(1);
     775             :                     ECHO;
     776             :                 }
     777             : :\{ {
     778             :                     /* Throw back everything but the colon */
     779             :                     yyless(1);
     780             :                     ECHO;
     781             :                 }
     782             : 
     783             :     /*
     784             :      * Back to backend-compatible rules.
     785             :      */
     786             : 
     787             : {self}          {
     788             :                     ECHO;
     789             :                 }
     790             : 
     791             : {operator}      {
     792             :                     /*
     793             :                      * Check for embedded slash-star or dash-dash; those
     794             :                      * are comment starts, so operator must stop there.
     795             :                      * Note that slash-star or dash-dash at the first
     796             :                      * character will match a prior rule, not this one.
     797             :                      */
     798             :                     int         nchars = yyleng;
     799             :                     char       *slashstar = strstr(yytext, "/*");
     800             :                     char       *dashdash = strstr(yytext, "--");
     801             : 
     802             :                     if (slashstar && dashdash)
     803             :                     {
     804             :                         /* if both appear, take the first one */
     805             :                         if (slashstar > dashdash)
     806             :                             slashstar = dashdash;
     807             :                     }
     808             :                     else if (!slashstar)
     809             :                         slashstar = dashdash;
     810             :                     if (slashstar)
     811             :                         nchars = slashstar - yytext;
     812             : 
     813             :                     /*
     814             :                      * For SQL compatibility, '+' and '-' cannot be the
     815             :                      * last char of a multi-char operator unless the operator
     816             :                      * contains chars that are not in SQL operators.
     817             :                      * The idea is to lex '=-' as two operators, but not
     818             :                      * to forbid operator names like '?-' that could not be
     819             :                      * sequences of SQL operators.
     820             :                      */
     821             :                     if (nchars > 1 &&
     822             :                         (yytext[nchars - 1] == '+' ||
     823             :                          yytext[nchars - 1] == '-'))
     824             :                     {
     825             :                         int         ic;
     826             : 
     827             :                         for (ic = nchars - 2; ic >= 0; ic--)
     828             :                         {
     829             :                             char c = yytext[ic];
     830             :                             if (c == '~' || c == '!' || c == '@' ||
     831             :                                 c == '#' || c == '^' || c == '&' ||
     832             :                                 c == '|' || c == '`' || c == '?' ||
     833             :                                 c == '%')
     834             :                                 break;
     835             :                         }
     836             :                         if (ic < 0)
     837             :                         {
     838             :                             /*
     839             :                              * didn't find a qualifying character, so remove
     840             :                              * all trailing [+-]
     841             :                              */
     842             :                             do {
     843             :                                 nchars--;
     844             :                             } while (nchars > 1 &&
     845             :                                  (yytext[nchars - 1] == '+' ||
     846             :                                   yytext[nchars - 1] == '-'));
     847             :                         }
     848             :                     }
     849             : 
     850             :                     if (nchars < yyleng)
     851             :                     {
     852             :                         /* Strip the unwanted chars from the token */
     853             :                         yyless(nchars);
     854             :                     }
     855             :                     ECHO;
     856             :                 }
     857             : 
     858             : {param}         {
     859             :                     ECHO;
     860             :                 }
     861             : {param_junk}    {
     862             :                     ECHO;
     863             :                 }
     864             : 
     865             : {decinteger}    {
     866             :                     ECHO;
     867             :                 }
     868             : {hexinteger}    {
     869             :                     ECHO;
     870             :                 }
     871             : {octinteger}    {
     872             :                     ECHO;
     873             :                 }
     874             : {bininteger}    {
     875             :                     ECHO;
     876             :                 }
     877             : {hexfail}       {
     878             :                     ECHO;
     879             :                 }
     880             : {octfail}       {
     881             :                     ECHO;
     882             :                 }
     883             : {binfail}       {
     884             :                     ECHO;
     885             :                 }
     886             : {numeric}       {
     887             :                     ECHO;
     888             :                 }
     889             : {numericfail}   {
     890             :                     /* throw back the .., and treat as integer */
     891             :                     yyless(yyleng - 2);
     892             :                     ECHO;
     893             :                 }
     894             : {real}          {
     895             :                     ECHO;
     896             :                 }
     897             : {realfail}      {
     898             :                     ECHO;
     899             :                 }
     900             : {decinteger_junk}   {
     901             :                     ECHO;
     902             :                 }
     903             : {hexinteger_junk}   {
     904             :                     ECHO;
     905             :                 }
     906             : {octinteger_junk}   {
     907             :                     ECHO;
     908             :                 }
     909             : {bininteger_junk}   {
     910             :                     ECHO;
     911             :                 }
     912             : {numeric_junk}  {
     913             :                     ECHO;
     914             :                 }
     915             : {real_junk}     {
     916             :                     ECHO;
     917             :                 }
     918             : 
     919             : 
     920             : {identifier}    {
     921             :                     /*
     922             :                      * We need to track if we are inside a BEGIN .. END block
     923             :                      * in a function definition, so that semicolons contained
     924             :                      * therein don't terminate the whole statement.  Short of
     925             :                      * writing a full parser here, the following heuristic
     926             :                      * should work.  First, we track whether the beginning of
     927             :                      * the statement matches CREATE [OR REPLACE]
     928             :                      * {FUNCTION|PROCEDURE}
     929             :                      */
     930             : 
     931             :                     if (cur_state->identifier_count == 0)
     932             :                         memset(cur_state->identifiers, 0, sizeof(cur_state->identifiers));
     933             : 
     934             :                     if (pg_strcasecmp(yytext, "create") == 0 ||
     935             :                         pg_strcasecmp(yytext, "function") == 0 ||
     936             :                         pg_strcasecmp(yytext, "procedure") == 0 ||
     937             :                         pg_strcasecmp(yytext, "or") == 0 ||
     938             :                         pg_strcasecmp(yytext, "replace") == 0)
     939             :                     {
     940             :                         if (cur_state->identifier_count < sizeof(cur_state->identifiers))
     941             :                             cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned char) yytext[0]);
     942             :                     }
     943             : 
     944             :                     cur_state->identifier_count++;
     945             : 
     946             :                     if (cur_state->identifiers[0] == 'c' &&
     947             :                         (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' ||
     948             :                          (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' &&
     949             :                           (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p'))) &&
     950             :                         cur_state->paren_depth == 0)
     951             :                     {
     952             :                         if (pg_strcasecmp(yytext, "begin") == 0)
     953             :                             cur_state->begin_depth++;
     954             :                         else if (pg_strcasecmp(yytext, "case") == 0)
     955             :                         {
     956             :                             /*
     957             :                              * CASE also ends with END.  We only need to track
     958             :                              * this if we are already inside a BEGIN.
     959             :                              */
     960             :                             if (cur_state->begin_depth >= 1)
     961             :                                 cur_state->begin_depth++;
     962             :                         }
     963             :                         else if (pg_strcasecmp(yytext, "end") == 0)
     964             :                         {
     965             :                             if (cur_state->begin_depth > 0)
     966             :                                 cur_state->begin_depth--;
     967             :                         }
     968             :                     }
     969             : 
     970             :                     ECHO;
     971             :                 }
     972             : 
     973             : {other}         {
     974             :                     ECHO;
     975             :                 }
     976             : 
     977             : <<EOF>>         {
     978             :                     if (cur_state->buffer_stack == NULL)
     979             :                     {
     980             :                         cur_state->start_state = YY_START;
     981             :                         return LEXRES_EOL;      /* end of input reached */
     982             :                     }
     983             : 
     984             :                     /*
     985             :                      * We were expanding a variable, so pop the inclusion
     986             :                      * stack and keep lexing
     987             :                      */
     988             :                     psqlscan_pop_buffer_stack(cur_state);
     989             :                     psqlscan_select_top_buffer(cur_state);
     990             :                 }
     991             : 
     992             : %%
     993             : 
     994             : /* LCOV_EXCL_STOP */
     995             : 
     996             : /*
     997             :  * Create a lexer working state struct.
     998             :  *
     999             :  * callbacks is a struct of function pointers that encapsulate some
    1000             :  * behavior we need from the surrounding program.  This struct must
    1001             :  * remain valid for the lifespan of the PsqlScanState.
    1002             :  */
    1003             : PsqlScanState
    1004             : psql_scan_create(const PsqlScanCallbacks *callbacks)
    1005       14402 : {
    1006             :     PsqlScanState state;
    1007             : 
    1008             :     state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
    1009       14402 : 
    1010             :     state->callbacks = callbacks;
    1011       14402 : 
    1012             :     yylex_init(&state->scanner);
    1013       14402 : 
    1014             :     yyset_extra(state, state->scanner);
    1015       14402 : 
    1016             :     psql_scan_reset(state);
    1017       14402 : 
    1018             :     return state;
    1019       14402 : }
    1020             : 
    1021             : /*
    1022             :  * Destroy a lexer working state struct, releasing all resources.
    1023             :  */
    1024             : void
    1025             : psql_scan_destroy(PsqlScanState state)
    1026       14304 : {
    1027             :     psql_scan_finish(state);
    1028       14304 : 
    1029             :     psql_scan_reset(state);
    1030       14304 : 
    1031             :     yylex_destroy(state->scanner);
    1032       14304 : 
    1033             :     free(state);
    1034       14304 : }
    1035       14304 : 
    1036             : /*
    1037             :  * Set the callback passthrough pointer for the lexer.
    1038             :  *
    1039             :  * This could have been integrated into psql_scan_create, but keeping it
    1040             :  * separate allows the application to change the pointer later, which might
    1041             :  * be useful.
    1042             :  */
    1043             : void
    1044             : psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
    1045       13866 : {
    1046             :     state->cb_passthrough = passthrough;
    1047       13866 : }
    1048       13866 : 
    1049             : /*
    1050             :  * Set up to perform lexing of the given input line.
    1051             :  *
    1052             :  * The text at *line, extending for line_len bytes, will be scanned by
    1053             :  * subsequent calls to the psql_scan routines.  psql_scan_finish should
    1054             :  * be called when scanning is complete.  Note that the lexer retains
    1055             :  * a pointer to the storage at *line --- this string must not be altered
    1056             :  * or freed until after psql_scan_finish is called.
    1057             :  *
    1058             :  * encoding is the libpq identifier for the character encoding in use,
    1059             :  * and std_strings says whether standard_conforming_strings is on.
    1060             :  */
    1061             : void
    1062             : psql_scan_setup(PsqlScanState state,
    1063      606338 :                 const char *line, int line_len,
    1064             :                 int encoding, bool std_strings)
    1065             : {
    1066             :     /* Mustn't be scanning already */
    1067             :     Assert(state->scanbufhandle == NULL);
    1068             :     Assert(state->buffer_stack == NULL);
    1069             : 
    1070             :     /* Do we need to hack the character set encoding? */
    1071             :     state->encoding = encoding;
    1072      606338 :     state->safe_encoding = pg_valid_server_encoding_id(encoding);
    1073      606338 : 
    1074             :     /* Save standard-strings flag as well */
    1075             :     state->std_strings = std_strings;
    1076      606338 : 
    1077             :     /* Set up flex input buffer with appropriate translation and padding */
    1078             :     state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
    1079      606338 :                                                    &state->scanbuf);
    1080             :     state->scanline = line;
    1081      606338 : 
    1082             :     /* Set lookaside data in case we have to map unsafe encoding */
    1083             :     state->curline = state->scanbuf;
    1084      606338 :     state->refline = state->scanline;
    1085      606338 : }
    1086      606338 : 
    1087             : /*
    1088             :  * Do lexical analysis of SQL command text.
    1089             :  *
    1090             :  * The text previously passed to psql_scan_setup is scanned, and appended
    1091             :  * (possibly with transformation) to query_buf.
    1092             :  *
    1093             :  * The return value indicates the condition that stopped scanning:
    1094             :  *
    1095             :  * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
    1096             :  * transferred to query_buf.)  The command accumulated in query_buf should
    1097             :  * be executed, then clear query_buf and call again to scan the remainder
    1098             :  * of the line.
    1099             :  *
    1100             :  * PSCAN_BACKSLASH: found a backslash that starts a special command.
    1101             :  * Any previous data on the line has been transferred to query_buf.
    1102             :  * The caller will typically next apply a separate flex lexer to scan
    1103             :  * the special command.
    1104             :  *
    1105             :  * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
    1106             :  * incomplete SQL command.  *prompt is set to the appropriate prompt type.
    1107             :  *
    1108             :  * PSCAN_EOL: the end of the line was reached, and there is no lexical
    1109             :  * reason to consider the command incomplete.  The caller may or may not
    1110             :  * choose to send it.  *prompt is set to the appropriate prompt type if
    1111             :  * the caller chooses to collect more input.
    1112             :  *
    1113             :  * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
    1114             :  * be called next, then the cycle may be repeated with a fresh input line.
    1115             :  *
    1116             :  * In all cases, *prompt is set to an appropriate prompt type code for the
    1117             :  * next line-input operation.
    1118             :  */
    1119             : PsqlScanResult
    1120             : psql_scan(PsqlScanState state,
    1121      929174 :           PQExpBuffer query_buf,
    1122             :           promptStatus_t *prompt)
    1123             : {
    1124             :     PsqlScanResult result;
    1125             :     int         lexresult;
    1126             : 
    1127             :     /* Must be scanning already */
    1128             :     Assert(state->scanbufhandle != NULL);
    1129             : 
    1130             :     /* Set current output target */
    1131             :     state->output_buf = query_buf;
    1132      929174 : 
    1133             :     /* Set input source */
    1134             :     if (state->buffer_stack != NULL)
    1135      929174 :         yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
    1136          90 :     else
    1137             :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1138      929084 : 
    1139             :     /* And lex. */
    1140             :     lexresult = yylex(NULL, state->scanner);
    1141      929174 : 
    1142             :     /*
    1143             :      * Check termination state and return appropriate result info.
    1144             :      */
    1145             :     switch (lexresult)
    1146      929174 :     {
    1147             :         case LEXRES_EOL:        /* end of input */
    1148      606122 :             switch (state->start_state)
    1149      606122 :             {
    1150             :                 case INITIAL:
    1151      560176 :                 case xqs:       /* we treat this like INITIAL */
    1152             :                     if (state->paren_depth > 0)
    1153      560176 :                     {
    1154             :                         result = PSCAN_INCOMPLETE;
    1155       44588 :                         *prompt = PROMPT_PAREN;
    1156       44588 :                     }
    1157             :                     else if (state->begin_depth > 0)
    1158      515588 :                     {
    1159             :                         result = PSCAN_INCOMPLETE;
    1160         452 :                         *prompt = PROMPT_CONTINUE;
    1161         452 :                     }
    1162             :                     else if (query_buf->len > 0)
    1163      515136 :                     {
    1164             :                         result = PSCAN_EOL;
    1165      100000 :                         *prompt = PROMPT_CONTINUE;
    1166      100000 :                     }
    1167             :                     else
    1168             :                     {
    1169             :                         /* never bother to send an empty buffer */
    1170             :                         result = PSCAN_INCOMPLETE;
    1171      415136 :                         *prompt = PROMPT_READY;
    1172      415136 :                     }
    1173             :                     break;
    1174      560176 :                 case xb:
    1175           0 :                     result = PSCAN_INCOMPLETE;
    1176           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1177           0 :                     break;
    1178           0 :                 case xc:
    1179         780 :                     result = PSCAN_INCOMPLETE;
    1180         780 :                     *prompt = PROMPT_COMMENT;
    1181         780 :                     break;
    1182         780 :                 case xd:
    1183          26 :                     result = PSCAN_INCOMPLETE;
    1184          26 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1185          26 :                     break;
    1186          26 :                 case xh:
    1187           0 :                     result = PSCAN_INCOMPLETE;
    1188           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1189           0 :                     break;
    1190           0 :                 case xe:
    1191         602 :                     result = PSCAN_INCOMPLETE;
    1192         602 :                     *prompt = PROMPT_SINGLEQUOTE;
    1193         602 :                     break;
    1194         602 :                 case xq:
    1195        9448 :                     result = PSCAN_INCOMPLETE;
    1196        9448 :                     *prompt = PROMPT_SINGLEQUOTE;
    1197        9448 :                     break;
    1198        9448 :                 case xdolq:
    1199       35090 :                     result = PSCAN_INCOMPLETE;
    1200       35090 :                     *prompt = PROMPT_DOLLARQUOTE;
    1201       35090 :                     break;
    1202       35090 :                 case xui:
    1203           0 :                     result = PSCAN_INCOMPLETE;
    1204           0 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1205           0 :                     break;
    1206           0 :                 case xus:
    1207           0 :                     result = PSCAN_INCOMPLETE;
    1208           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1209           0 :                     break;
    1210           0 :                 default:
    1211           0 :                     /* can't get here */
    1212             :                     fprintf(stderr, "invalid YY_START\n");
    1213           0 :                     exit(1);
    1214           0 :             }
    1215             :             break;
    1216      606122 :         case LEXRES_SEMI:       /* semicolon */
    1217      308878 :             result = PSCAN_SEMICOLON;
    1218      308878 :             *prompt = PROMPT_READY;
    1219      308878 :             break;
    1220      308878 :         case LEXRES_BACKSLASH:  /* backslash */
    1221       14174 :             result = PSCAN_BACKSLASH;
    1222       14174 :             *prompt = PROMPT_READY;
    1223       14174 :             break;
    1224       14174 :         default:
    1225           0 :             /* can't get here */
    1226             :             fprintf(stderr, "invalid yylex result\n");
    1227           0 :             exit(1);
    1228           0 :     }
    1229             : 
    1230             :     return result;
    1231      929174 : }
    1232             : 
    1233             : /*
    1234             :  * Clean up after scanning a string.  This flushes any unread input and
    1235             :  * releases resources (but not the PsqlScanState itself).  Note however
    1236             :  * that this does not reset the lexer scan state; that can be done by
    1237             :  * psql_scan_reset(), which is an orthogonal operation.
    1238             :  *
    1239             :  * It is legal to call this when not scanning anything (makes it easier
    1240             :  * to deal with error recovery).
    1241             :  */
    1242             : void
    1243             : psql_scan_finish(PsqlScanState state)
    1244      620542 : {
    1245             :     /* Drop any incomplete variable expansions. */
    1246             :     while (state->buffer_stack != NULL)
    1247      620542 :         psqlscan_pop_buffer_stack(state);
    1248           0 : 
    1249             :     /* Done with the outer scan buffer, too */
    1250             :     if (state->scanbufhandle)
    1251      620542 :         yy_delete_buffer(state->scanbufhandle, state->scanner);
    1252      606242 :     state->scanbufhandle = NULL;
    1253      620542 :     if (state->scanbuf)
    1254      620542 :         free(state->scanbuf);
    1255      606242 :     state->scanbuf = NULL;
    1256      620542 : }
    1257      620542 : 
    1258             : /*
    1259             :  * Reset lexer scanning state to start conditions.  This is appropriate
    1260             :  * for executing \r psql commands (or any other time that we discard the
    1261             :  * prior contents of query_buf).  It is not, however, necessary to do this
    1262             :  * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
    1263             :  * PSCAN_EOL scan result, because the scan state must be INITIAL when those
    1264             :  * conditions are returned.
    1265             :  *
    1266             :  * Note that this is unrelated to flushing unread input; that task is
    1267             :  * done by psql_scan_finish().
    1268             :  */
    1269             : void
    1270             : psql_scan_reset(PsqlScanState state)
    1271       29860 : {
    1272             :     state->start_state = INITIAL;
    1273       29860 :     state->paren_depth = 0;
    1274       29860 :     state->xcdepth = 0;          /* not really necessary */
    1275       29860 :     if (state->dolqstart)
    1276       29860 :         free(state->dolqstart);
    1277           0 :     state->dolqstart = NULL;
    1278       29860 :     state->identifier_count = 0;
    1279       29860 :     state->begin_depth = 0;
    1280       29860 : }
    1281       29860 : 
    1282             : /*
    1283             :  * Reselect this lexer (psqlscan.l) after using another one.
    1284             :  *
    1285             :  * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
    1286             :  * state, because we'd never switch to another lexer in a different state.
    1287             :  * However, we don't want to reset e.g. paren_depth, so this can't be
    1288             :  * the same as psql_scan_reset().
    1289             :  *
    1290             :  * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
    1291             :  * must be a superset of this.
    1292             :  *
    1293             :  * Note: it seems likely that other lexers could just assign INITIAL for
    1294             :  * themselves, since that probably has the value zero in every flex-generated
    1295             :  * lexer.  But let's not assume that.
    1296             :  */
    1297             : void
    1298             : psql_scan_reselect_sql_lexer(PsqlScanState state)
    1299       63544 : {
    1300             :     state->start_state = INITIAL;
    1301       63544 : }
    1302       63544 : 
    1303             : /*
    1304             :  * Return true if lexer is currently in an "inside quotes" state.
    1305             :  *
    1306             :  * This is pretty grotty but is needed to preserve the old behavior
    1307             :  * that mainloop.c drops blank lines not inside quotes without even
    1308             :  * echoing them.
    1309             :  */
    1310             : bool
    1311             : psql_scan_in_quote(PsqlScanState state)
    1312      128494 : {
    1313             :     return state->start_state != INITIAL &&
    1314      129410 :             state->start_state != xqs;
    1315         916 : }
    1316             : 
    1317             : /*
    1318             :  * Push the given string onto the stack of stuff to scan.
    1319             :  *
    1320             :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1321             :  */
    1322             : void
    1323             : psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
    1324        1196 :                          const char *varname)
    1325             : {
    1326             :     StackElem  *stackelem;
    1327             : 
    1328             :     stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
    1329        1196 : 
    1330             :     /*
    1331             :      * In current usage, the passed varname points at the current flex input
    1332             :      * buffer; we must copy it before calling psqlscan_prepare_buffer()
    1333             :      * because that will change the buffer state.
    1334             :      */
    1335             :     stackelem->varname = varname ? pg_strdup(varname) : NULL;
    1336        1196 : 
    1337             :     stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
    1338        1196 :                                              &stackelem->bufstring);
    1339             :     state->curline = stackelem->bufstring;
    1340        1196 :     if (state->safe_encoding)
    1341        1196 :     {
    1342             :         stackelem->origstring = NULL;
    1343        1196 :         state->refline = stackelem->bufstring;
    1344        1196 :     }
    1345             :     else
    1346             :     {
    1347             :         stackelem->origstring = pg_strdup(newstr);
    1348           0 :         state->refline = stackelem->origstring;
    1349           0 :     }
    1350             :     stackelem->next = state->buffer_stack;
    1351        1196 :     state->buffer_stack = stackelem;
    1352        1196 : }
    1353        1196 : 
    1354             : /*
    1355             :  * Pop the topmost buffer stack item (there must be one!)
    1356             :  *
    1357             :  * NB: after this, the flex input state is unspecified; caller must
    1358             :  * switch to an appropriate buffer to continue lexing.
    1359             :  * See psqlscan_select_top_buffer().
    1360             :  */
    1361             : void
    1362             : psqlscan_pop_buffer_stack(PsqlScanState state)
    1363        1196 : {
    1364             :     StackElem  *stackelem = state->buffer_stack;
    1365        1196 : 
    1366             :     state->buffer_stack = stackelem->next;
    1367        1196 :     yy_delete_buffer(stackelem->buf, state->scanner);
    1368        1196 :     free(stackelem->bufstring);
    1369        1196 :     if (stackelem->origstring)
    1370        1196 :         free(stackelem->origstring);
    1371           0 :     if (stackelem->varname)
    1372        1196 :         free(stackelem->varname);
    1373        1196 :     free(stackelem);
    1374        1196 : }
    1375        1196 : 
    1376             : /*
    1377             :  * Select the topmost surviving buffer as the active input.
    1378             :  */
    1379             : void
    1380             : psqlscan_select_top_buffer(PsqlScanState state)
    1381        1196 : {
    1382             :     StackElem  *stackelem = state->buffer_stack;
    1383        1196 : 
    1384             :     if (stackelem != NULL)
    1385        1196 :     {
    1386             :         yy_switch_to_buffer(stackelem->buf, state->scanner);
    1387           0 :         state->curline = stackelem->bufstring;
    1388           0 :         state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
    1389           0 :     }
    1390             :     else
    1391             :     {
    1392             :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1393        1196 :         state->curline = state->scanbuf;
    1394        1196 :         state->refline = state->scanline;
    1395        1196 :     }
    1396             : }
    1397        1196 : 
    1398             : /*
    1399             :  * Check if specified variable name is the source for any string
    1400             :  * currently being scanned
    1401             :  */
    1402             : bool
    1403             : psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
    1404        1196 : {
    1405             :     StackElem  *stackelem;
    1406             : 
    1407             :     for (stackelem = state->buffer_stack;
    1408        1196 :          stackelem != NULL;
    1409             :          stackelem = stackelem->next)
    1410           0 :     {
    1411             :         if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
    1412           0 :             return true;
    1413           0 :     }
    1414             :     return false;
    1415        1196 : }
    1416             : 
    1417             : /*
    1418             :  * Set up a flex input buffer to scan the given data.  We always make a
    1419             :  * copy of the data.  If working in an unsafe encoding, the copy has
    1420             :  * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
    1421             :  *
    1422             :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1423             :  */
    1424             : YY_BUFFER_STATE
    1425             : psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
    1426      607534 :                         char **txtcopy)
    1427             : {
    1428             :     char       *newtxt;
    1429             : 
    1430             :     /* Flex wants two \0 characters after the actual data */
    1431             :     newtxt = pg_malloc(len + 2);
    1432      607534 :     *txtcopy = newtxt;
    1433      607534 :     newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
    1434      607534 : 
    1435             :     if (state->safe_encoding)
    1436      607534 :         memcpy(newtxt, txt, len);
    1437      607534 :     else
    1438             :     {
    1439             :         /* Gotta do it the hard way */
    1440             :         int         i = 0;
    1441           0 : 
    1442             :         while (i < len)
    1443           0 :         {
    1444             :             int         thislen = PQmblen(txt + i, state->encoding);
    1445           0 : 
    1446             :             /* first byte should always be okay... */
    1447             :             newtxt[i] = txt[i];
    1448           0 :             i++;
    1449           0 :             while (--thislen > 0 && i < len)
    1450           0 :                 newtxt[i++] = (char) 0xFF;
    1451           0 :         }
    1452             :     }
    1453             : 
    1454             :     return yy_scan_buffer(newtxt, len + 2, state->scanner);
    1455      607534 : }
    1456             : 
    1457             : /*
    1458             :  * psqlscan_emit() --- body for ECHO macro
    1459             :  *
    1460             :  * NB: this must be used for ALL and ONLY the text copied from the flex
    1461             :  * input data.  If you pass it something that is not part of the yytext
    1462             :  * string, you are making a mistake.  Internally generated text can be
    1463             :  * appended directly to state->output_buf.
    1464             :  */
    1465             : void
    1466             : psqlscan_emit(PsqlScanState state, const char *txt, int len)
    1467     7236586 : {
    1468             :     PQExpBuffer output_buf = state->output_buf;
    1469     7236586 : 
    1470             :     if (state->safe_encoding)
    1471     7236586 :         appendBinaryPQExpBuffer(output_buf, txt, len);
    1472     7236586 :     else
    1473             :     {
    1474             :         /* Gotta do it the hard way */
    1475             :         const char *reference = state->refline;
    1476           0 :         int         i;
    1477             : 
    1478             :         reference += (txt - state->curline);
    1479           0 : 
    1480             :         for (i = 0; i < len; i++)
    1481           0 :         {
    1482             :             char        ch = txt[i];
    1483           0 : 
    1484             :             if (ch == (char) 0xFF)
    1485           0 :                 ch = reference[i];
    1486           0 :             appendPQExpBufferChar(output_buf, ch);
    1487           0 :         }
    1488             :     }
    1489             : }
    1490     7236586 : 
    1491             : /*
    1492             :  * psqlscan_extract_substring --- fetch value of (part of) the current token
    1493             :  *
    1494             :  * This is like psqlscan_emit(), except that the data is returned as a
    1495             :  * malloc'd string rather than being pushed directly to state->output_buf.
    1496             :  */
    1497             : char *
    1498             : psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
    1499        4744 : {
    1500             :     char       *result = (char *) pg_malloc(len + 1);
    1501        4744 : 
    1502             :     if (state->safe_encoding)
    1503        4744 :         memcpy(result, txt, len);
    1504        4744 :     else
    1505             :     {
    1506             :         /* Gotta do it the hard way */
    1507             :         const char *reference = state->refline;
    1508           0 :         int         i;
    1509             : 
    1510             :         reference += (txt - state->curline);
    1511           0 : 
    1512             :         for (i = 0; i < len; i++)
    1513           0 :         {
    1514             :             char        ch = txt[i];
    1515           0 : 
    1516             :             if (ch == (char) 0xFF)
    1517           0 :                 ch = reference[i];
    1518           0 :             result[i] = ch;
    1519           0 :         }
    1520             :     }
    1521             :     result[len] = '\0';
    1522        4744 :     return result;
    1523        4744 : }
    1524             : 
    1525             : /*
    1526             :  * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
    1527             :  *
    1528             :  * If the variable name is found, escape its value using the appropriate
    1529             :  * quoting method and emit the value to output_buf.  (Since the result is
    1530             :  * surely quoted, there is never any reason to rescan it.)  If we don't
    1531             :  * find the variable or escaping fails, emit the token as-is.
    1532             :  */
    1533             : void
    1534             : psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
    1535         834 :                          PsqlScanQuoteType quote)
    1536             : {
    1537             :     char       *varname;
    1538             :     char       *value;
    1539             : 
    1540             :     /* Variable lookup. */
    1541             :     varname = psqlscan_extract_substring(state, txt + 2, len - 3);
    1542         834 :     if (state->callbacks->get_variable)
    1543         834 :         value = state->callbacks->get_variable(varname, quote,
    1544         834 :                                                state->cb_passthrough);
    1545             :     else
    1546             :         value = NULL;
    1547           0 :     free(varname);
    1548         834 : 
    1549             :     if (value)
    1550         834 :     {
    1551             :         /* Emit the suitably-escaped value */
    1552             :         appendPQExpBufferStr(state->output_buf, value);
    1553         778 :         free(value);
    1554         778 :     }
    1555             :     else
    1556             :     {
    1557             :         /* Emit original token as-is */
    1558             :         psqlscan_emit(state, txt, len);
    1559          56 :     }
    1560             : }
    1561         834 : 
    1562             : void
    1563             : psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
    1564          32 : {
    1565             :     char    *varname;
    1566             :     char    *value;
    1567             : 
    1568             :     varname = psqlscan_extract_substring(state, txt + 3, len - 4);
    1569          32 :     if (state->callbacks->get_variable)
    1570          32 :         value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
    1571          32 :                                                state->cb_passthrough);
    1572             :     else
    1573             :         value = NULL;
    1574           0 :     free(varname);
    1575          32 : 
    1576             :     if (value != NULL)
    1577          32 :     {
    1578             :         psqlscan_emit(state, "TRUE", 4);
    1579          14 :         free(value);
    1580          14 :     }
    1581             :     else
    1582             :     {
    1583             :         psqlscan_emit(state, "FALSE", 5);
    1584          18 :     }
    1585             : }

Generated by: LCOV version 1.14