Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * psqlscan.l
5 : * lexical scanner for SQL commands
6 : *
7 : * This lexer used to be part of psql, and that heritage is reflected in
8 : * the file name as well as function and typedef names, though it can now
9 : * be used by other frontend programs as well. It's also possible to extend
10 : * this lexer with a compatible add-on lexer to handle program-specific
11 : * backslash commands.
12 : *
13 : * This code is mainly concerned with determining where the end of a SQL
14 : * statement is: we are looking for semicolons that are not within quotes,
15 : * comments, or parentheses. The most reliable way to handle this is to
16 : * borrow the backend's flex lexer rules, lock, stock, and barrel. The rules
17 : * below are (except for a few) the same as the backend's, but their actions
18 : * are just ECHO whereas the backend's actions generally do other things.
19 : *
20 : * XXX The rules in this file must be kept in sync with the backend lexer!!!
21 : *
22 : * XXX Avoid creating backtracking cases --- see the backend lexer for info.
23 : *
24 : * See psqlscan_int.h for additional commentary.
25 : *
26 : *
27 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
28 : * Portions Copyright (c) 1994, Regents of the University of California
29 : *
30 : * IDENTIFICATION
31 : * src/fe_utils/psqlscan.l
32 : *
33 : *-------------------------------------------------------------------------
34 : */
35 : #include "postgres_fe.h"
36 :
37 : #include "common/logging.h"
38 : #include "fe_utils/psqlscan.h"
39 :
40 : #include "libpq-fe.h"
41 : }
42 :
43 : %{
44 :
45 : /* LCOV_EXCL_START */
46 :
47 : #include "fe_utils/psqlscan_int.h"
48 :
49 : /*
50 : * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
51 : * doesn't presently make use of that argument, so just declare it as int.
52 : */
53 : typedef int YYSTYPE;
54 :
55 :
56 : /* Return values from yylex() */
57 : #define LEXRES_EOL 0 /* end of input */
58 : #define LEXRES_SEMI 1 /* command-terminating semicolon found */
59 : #define LEXRES_BACKSLASH 2 /* backslash command start */
60 :
61 :
62 : #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
63 :
64 : static void psqlscan_track_identifier(PsqlScanState state,
65 : const char *identifier);
66 :
67 : %}
68 :
69 : %option reentrant
70 : %option bison-bridge
71 : %option 8bit
72 : %option never-interactive
73 : %option nodefault
74 : %option noinput
75 : %option nounput
76 : %option noyywrap
77 : %option warn
78 : %option prefix="psql_yy"
79 :
80 : /*
81 : * Set the type of yyextra; we use it as a pointer back to the containing
82 : * PsqlScanState.
83 : */
84 : %option extra-type="PsqlScanState"
85 :
86 : /*
87 : * All of the following definitions and rules should exactly match
88 : * src/backend/parser/scan.l so far as the flex patterns are concerned.
89 : * The rule bodies are just ECHO as opposed to what the backend does,
90 : * however. (But be sure to duplicate code that affects the lexing process,
91 : * such as BEGIN() and yyless().) Also, psqlscan uses a single <<EOF>> rule
92 : * whereas scan.l has a separate one for each exclusive state.
93 : */
94 :
95 : /*
96 : * OK, here is a short description of lex/flex rules behavior.
97 : * The longest pattern which matches an input string is always chosen.
98 : * For equal-length patterns, the first occurring in the rules list is chosen.
99 : * INITIAL is the starting state, to which all non-conditional rules apply.
100 : * Exclusive states change parsing rules while the state is active. When in
101 : * an exclusive state, only those rules defined for that state apply.
102 : *
103 : * We use exclusive states for quoted strings, extended comments,
104 : * and to eliminate parsing troubles for numeric strings.
105 : * Exclusive states:
106 : * <xb> bit string literal
107 : * <xc> extended C-style comments
108 : * <xd> delimited identifiers (double-quoted identifiers)
109 : * <xh> hexadecimal byte string
110 : * <xq> standard quoted strings
111 : * <xqs> quote stop (detect continued strings)
112 : * <xe> extended quoted strings (support backslash escape sequences)
113 : * <xdolq> $foo$ quoted strings
114 : * <xui> quoted identifier with Unicode escapes
115 : * <xus> quoted string with Unicode escapes
116 : *
117 : * Note: we intentionally don't mimic the backend's <xeu> state; we have
118 : * no need to distinguish it from <xe> state, and no good way to get out
119 : * of it in error cases. The backend just throws yyerror() in those
120 : * cases, but that's not an option here.
121 : */
122 :
123 : %x xb
124 : %x xc
125 : %x xd
126 : %x xh
127 : %x xq
128 : %x xqs
129 : %x xe
130 : %x xdolq
131 : %x xui
132 : %x xus
133 :
134 : /*
135 : * In order to make the world safe for Windows and Mac clients as well as
136 : * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
137 : * sequence will be seen as two successive newlines, but that doesn't cause
138 : * any problems. Comments that start with -- and extend to the next
139 : * newline are treated as equivalent to a single whitespace character.
140 : *
141 : * NOTE a fine point: if there is no newline following --, we will absorb
142 : * everything to the end of the input as a comment. This is correct. Older
143 : * versions of Postgres failed to recognize -- as a comment if the input
144 : * did not end with a newline.
145 : *
146 : * non_newline_space tracks all space characters except newlines.
147 : *
148 : * XXX if you change the set of whitespace characters, fix scanner_isspace()
149 : * to agree.
150 : */
151 :
152 : space [ \t\n\r\f\v]
153 : non_newline_space [ \t\f\v]
154 : newline [\n\r]
155 : non_newline [^\n\r]
156 :
157 : comment ("--"{non_newline}*)
158 :
159 : whitespace ({space}+|{comment})
160 :
161 : /*
162 : * SQL requires at least one newline in the whitespace separating
163 : * string literals that are to be concatenated. Silly, but who are we
164 : * to argue? Note that {whitespace_with_newline} should not have * after
165 : * it, whereas {whitespace} should generally have a * after it...
166 : */
167 :
168 : special_whitespace ({space}+|{comment}{newline})
169 : non_newline_whitespace ({non_newline_space}|{comment})
170 : whitespace_with_newline ({non_newline_whitespace}*{newline}{special_whitespace}*)
171 :
172 : quote '
173 : /* If we see {quote} then {quotecontinue}, the quoted string continues */
174 : quotecontinue {whitespace_with_newline}{quote}
175 :
176 : /*
177 : * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
178 : * {quotecontinue}. It might seem that this could just be {whitespace}*,
179 : * but if there's a dash after {whitespace_with_newline}, it must be consumed
180 : * to see if there's another dash --- which would start a {comment} and thus
181 : * allow continuation of the {quotecontinue} token.
182 : */
183 : quotecontinuefail {whitespace}*"-"?
184 :
185 : /* Bit string
186 : * It is tempting to scan the string for only those characters
187 : * which are allowed. However, this leads to silently swallowed
188 : * characters if illegal characters are included in the string.
189 : * For example, if xbinside is [01] then B'ABCD' is interpreted
190 : * as a zero-length string, and the ABCD' is lost!
191 : * Better to pass the string forward and let the input routines
192 : * validate the contents.
193 : */
194 : xbstart [bB]{quote}
195 : xbinside [^']*
196 :
197 : /* Hexadecimal byte string */
198 : xhstart [xX]{quote}
199 : xhinside [^']*
200 :
201 : /* National character */
202 : xnstart [nN]{quote}
203 :
204 : /* Quoted string that allows backslash escapes */
205 : xestart [eE]{quote}
206 : xeinside [^\\']+
207 : xeescape [\\][^0-7]
208 : xeoctesc [\\][0-7]{1,3}
209 : xehexesc [\\]x[0-9A-Fa-f]{1,2}
210 : xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
211 : xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
212 :
213 : /* Extended quote
214 : * xqdouble implements embedded quote, ''''
215 : */
216 : xqstart {quote}
217 : xqdouble {quote}{quote}
218 : xqinside [^']+
219 :
220 : /* $foo$ style quotes ("dollar quoting")
221 : * The quoted string starts with $foo$ where "foo" is an optional string
222 : * in the form of an identifier, except that it may not contain "$",
223 : * and extends to the first occurrence of an identical string.
224 : * There is *no* processing of the quoted text.
225 : *
226 : * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
227 : * fails to match its trailing "$".
228 : */
229 : dolq_start [A-Za-z\200-\377_]
230 : dolq_cont [A-Za-z\200-\377_0-9]
231 : dolqdelim \$({dolq_start}{dolq_cont}*)?\$
232 : dolqfailed \${dolq_start}{dolq_cont}*
233 : dolqinside [^$]+
234 :
235 : /* Double quote
236 : * Allows embedded spaces and other special characters into identifiers.
237 : */
238 : dquote \"
239 : xdstart {dquote}
240 : xdstop {dquote}
241 : xddouble {dquote}{dquote}
242 : xdinside [^"]+
243 :
244 : /* Quoted identifier with Unicode escapes */
245 : xuistart [uU]&{dquote}
246 :
247 : /* Quoted string with Unicode escapes */
248 : xusstart [uU]&{quote}
249 :
250 : /* error rule to avoid backup */
251 : xufailed [uU]&
252 :
253 :
254 : /* C-style comments
255 : *
256 : * The "extended comment" syntax closely resembles allowable operator syntax.
257 : * The tricky part here is to get lex to recognize a string starting with
258 : * slash-star as a comment, when interpreting it as an operator would produce
259 : * a longer match --- remember lex will prefer a longer match! Also, if we
260 : * have something like plus-slash-star, lex will think this is a 3-character
261 : * operator whereas we want to see it as a + operator and a comment start.
262 : * The solution is two-fold:
263 : * 1. append {op_chars}* to xcstart so that it matches as much text as
264 : * {operator} would. Then the tie-breaker (first matching rule of same
265 : * length) ensures xcstart wins. We put back the extra stuff with yyless()
266 : * in case it contains a star-slash that should terminate the comment.
267 : * 2. In the operator rule, check for slash-star within the operator, and
268 : * if found throw it back with yyless(). This handles the plus-slash-star
269 : * problem.
270 : * Dash-dash comments have similar interactions with the operator rule.
271 : */
272 : xcstart \/\*{op_chars}*
273 : xcstop \*+\/
274 : xcinside [^*/]+
275 :
276 : ident_start [A-Za-z\200-\377_]
277 : ident_cont [A-Za-z\200-\377_0-9\$]
278 :
279 : identifier {ident_start}{ident_cont}*
280 :
281 : /* Assorted special-case operators and operator-like tokens */
282 : typecast "::"
283 : dot_dot \.\.
284 : colon_equals ":="
285 :
286 : /*
287 : * These operator-like tokens (unlike the above ones) also match the {operator}
288 : * rule, which means that they might be overridden by a longer match if they
289 : * are followed by a comment start or a + or - character. Accordingly, if you
290 : * add to this list, you must also add corresponding code to the {operator}
291 : * block to return the correct token in such cases. (This is not needed in
292 : * psqlscan.l since the token value is ignored there.)
293 : */
294 : equals_greater "=>"
295 : less_equals "<="
296 : greater_equals ">="
297 : less_greater "<>"
298 : not_equals "!="
299 : /* Note there is no need for left_arrow, since "<-" is not a single operator. */
300 : right_arrow "->"
301 :
302 : /*
303 : * "self" is the set of chars that should be returned as single-character
304 : * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
305 : * which can be one or more characters long (but if a single-char token
306 : * appears in the "self" set, it is not to be returned as an Op). Note
307 : * that the sets overlap, but each has some chars that are not in the other.
308 : *
309 : * If you change either set, adjust the character lists appearing in the
310 : * rule for "operator"!
311 : */
312 : self [,()\[\].;\:\|\+\-\*\/\%\^\<\>\=]
313 : op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
314 : operator {op_chars}+
315 :
316 : /*
317 : * Numbers
318 : *
319 : * Unary minus is not part of a number here. Instead we pass it separately to
320 : * the parser, and there it gets coerced via doNegate().
321 : *
322 : * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
323 : *
324 : * {realfail} is added to prevent the need for scanner
325 : * backup when the {real} rule fails to match completely.
326 : */
327 : decdigit [0-9]
328 : hexdigit [0-9A-Fa-f]
329 : octdigit [0-7]
330 : bindigit [0-1]
331 :
332 : decinteger {decdigit}(_?{decdigit})*
333 : hexinteger 0[xX](_?{hexdigit})+
334 : octinteger 0[oO](_?{octdigit})+
335 : bininteger 0[bB](_?{bindigit})+
336 :
337 : hexfail 0[xX]_?
338 : octfail 0[oO]_?
339 : binfail 0[bB]_?
340 :
341 : numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
342 : numericfail {decinteger}\.\.
343 :
344 : real ({decinteger}|{numeric})[Ee][-+]?{decinteger}
345 : realfail ({decinteger}|{numeric})[Ee][-+]
346 :
347 : /* Positional parameters don't accept underscores. */
348 : param \${decdigit}+
349 :
350 : /*
351 : * An identifier immediately following an integer literal is disallowed because
352 : * in some cases it's ambiguous what is meant: for example, 0x1234 could be
353 : * either a hexinteger or a decinteger "0" and an identifier "x1234". We can
354 : * detect such problems by seeing if integer_junk matches a longer substring
355 : * than any of the XXXinteger patterns (decinteger, hexinteger, octinteger,
356 : * bininteger). One "junk" pattern is sufficient because
357 : * {decinteger}{identifier} will match all the same strings we'd match with
358 : * {hexinteger}{identifier} etc.
359 : *
360 : * Note that the rule for integer_junk must appear after the ones for
361 : * XXXinteger to make this work correctly: 0x1234 will match both hexinteger
362 : * and integer_junk, and we need hexinteger to be chosen in that case.
363 : *
364 : * Also disallow strings matched by numeric_junk, real_junk and param_junk
365 : * for consistency.
366 : */
367 : integer_junk {decinteger}{identifier}
368 : numeric_junk {numeric}{identifier}
369 : real_junk {real}{identifier}
370 : param_junk \${decdigit}+{identifier}
371 :
372 : /* psql-specific: characters allowed in variable names */
373 : variable_char [A-Za-z\200-\377_0-9]
374 :
375 : other .
376 :
377 : /*
378 : * Dollar quoted strings are totally opaque, and no escaping is done on them.
379 : * Other quoted strings must allow some special characters such as single-quote
380 : * and newline.
381 : * Embedded single-quotes are implemented both in the SQL standard
382 : * style of two adjacent single quotes "''" and in the Postgres/Java style
383 : * of escaped-quote "\'".
384 : * Other embedded escaped characters are matched explicitly and the leading
385 : * backslash is dropped from the string.
386 : * Note that xcstart must appear before operator, as explained above!
387 : * Also whitespace (comment) must appear before operator.
388 : */
389 :
390 : %%
391 :
392 : %{
393 : /* Declare some local variables inside yylex(), for convenience */
394 : PsqlScanState cur_state = yyextra;
395 799255 : PQExpBuffer output_buf = cur_state->output_buf;
396 799255 :
397 : /*
398 : * Force flex into the state indicated by start_state. This has a
399 : * couple of purposes: it lets some of the functions below set a new
400 : * starting state without ugly direct access to flex variables, and it
401 : * allows us to transition from one flex lexer to another so that we
402 : * can lex different parts of the source string using separate lexers.
403 : */
404 : BEGIN(cur_state->start_state);
405 799255 : %}
406 :
407 : {whitespace} {
408 : /*
409 : * Note that the whitespace rule includes both true
410 : * whitespace and single-line ("--" style) comments.
411 : * We suppress whitespace until we have collected some
412 : * non-whitespace data. (This interacts with some
413 : * decisions in MainLoop(); see there for details.)
414 : */
415 : if (output_buf->len > 0)
416 1900577 : ECHO;
417 1787637 : }
418 :
419 1900577 : {xcstart} {
420 448 : cur_state->xcdepth = 0;
421 448 : BEGIN(xc);
422 448 : /* Put back any characters past slash-star; see above */
423 : yyless(2);
424 448 : ECHO;
425 448 : }
426 :
427 448 : <xc>{
428 : {xcstart} {
429 12 : cur_state->xcdepth++;
430 12 : /* Put back any characters past slash-star; see above */
431 : yyless(2);
432 12 : ECHO;
433 12 : }
434 :
435 12 : {xcstop} {
436 460 : if (cur_state->xcdepth <= 0)
437 460 : BEGIN(INITIAL);
438 448 : else
439 : cur_state->xcdepth--;
440 12 : ECHO;
441 460 : }
442 :
443 460 : {xcinside} {
444 1029 : ECHO;
445 1029 : }
446 :
447 1029 : {op_chars} {
448 277 : ECHO;
449 277 : }
450 :
451 277 : \*+ {
452 0 : ECHO;
453 0 : }
454 : } /* <xc> */
455 0 :
456 : {xbstart} {
457 500 : BEGIN(xb);
458 500 : ECHO;
459 500 : }
460 : <xh>{xhinside} |
461 500 : <xb>{xbinside} {
462 2695 : ECHO;
463 2695 : }
464 :
465 2695 : {xhstart} {
466 2215 : /* Hexadecimal bit type.
467 : * At some point we should simply pass the string
468 : * forward to the parser and label it there.
469 : * In the meantime, place a leading "x" on the string
470 : * to mark it for the input routine as a hex string.
471 : */
472 : BEGIN(xh);
473 2215 : ECHO;
474 2215 : }
475 :
476 2215 : {xnstart} {
477 0 : yyless(1); /* eat only 'n' this time */
478 0 : ECHO;
479 0 : }
480 :
481 0 : {xqstart} {
482 158798 : if (cur_state->std_strings)
483 158798 : BEGIN(xq);
484 158798 : else
485 : BEGIN(xe);
486 0 : ECHO;
487 158798 : }
488 : {xestart} {
489 158798 : BEGIN(xe);
490 877 : ECHO;
491 877 : }
492 : {xusstart} {
493 877 : BEGIN(xus);
494 464 : ECHO;
495 464 : }
496 :
497 464 : <xb,xh,xq,xe,xus>{quote} {
498 162854 : /*
499 : * When we are scanning a quoted string and see an end
500 : * quote, we must look ahead for a possible continuation.
501 : * If we don't see one, we know the end quote was in fact
502 : * the end of the string. To reduce the lexer table size,
503 : * we use a single "xqs" state to do the lookahead for all
504 : * types of strings.
505 : */
506 : cur_state->state_before_str_stop = YYSTATE;
507 162854 : BEGIN(xqs);
508 162854 : ECHO;
509 162854 : }
510 : <xqs>{quotecontinue} {
511 162854 : /*
512 0 : * Found a quote continuation, so return to the in-quote
513 : * state and continue scanning the literal. Nothing is
514 : * added to the literal's contents.
515 : */
516 : BEGIN(cur_state->state_before_str_stop);
517 0 : ECHO;
518 0 : }
519 : <xqs>{quotecontinuefail} |
520 0 : <xqs>{other} {
521 162027 : /*
522 : * Failed to see a quote continuation. Throw back
523 : * everything after the end quote, and handle the string
524 : * according to the state we were in previously.
525 : */
526 : yyless(0);
527 162027 : BEGIN(INITIAL);
528 162027 : /* There's nothing to echo ... */
529 : }
530 :
531 162027 : <xq,xe,xus>{xqdouble} {
532 4127 : ECHO;
533 4127 : }
534 : <xq,xus>{xqinside} {
535 4127 : ECHO;
536 166581 : }
537 : <xe>{xeinside} {
538 166581 : ECHO;
539 1663 : }
540 : <xe>{xeunicode} {
541 1663 : ECHO;
542 132 : }
543 : <xe>{xeunicodefail} {
544 132 : ECHO;
545 8 : }
546 : <xe>{xeescape} {
547 8 : ECHO;
548 962 : }
549 : <xe>{xeoctesc} {
550 962 : ECHO;
551 14 : }
552 : <xe>{xehexesc} {
553 14 : ECHO;
554 6 : }
555 : <xe>. {
556 6 : /* This is only needed for \ just before EOF */
557 0 : ECHO;
558 0 : }
559 :
560 0 : {dolqdelim} {
561 4584 : cur_state->dolqstart = pg_strdup(yytext);
562 4584 : BEGIN(xdolq);
563 4584 : ECHO;
564 4584 : }
565 : {dolqfailed} {
566 4584 : /* throw back all but the initial "$" */
567 0 : yyless(1);
568 0 : ECHO;
569 0 : }
570 : <xdolq>{dolqdelim} {
571 0 : if (strcmp(yytext, cur_state->dolqstart) == 0)
572 4800 : {
573 : free(cur_state->dolqstart);
574 4584 : cur_state->dolqstart = NULL;
575 4584 : BEGIN(INITIAL);
576 4584 : }
577 : else
578 : {
579 : /*
580 : * When we fail to match $...$ to dolqstart, transfer
581 : * the $... part to the output, but put back the final
582 : * $ for rescanning. Consider $delim$...$junk$delim$
583 : */
584 : yyless(yyleng - 1);
585 216 : }
586 : ECHO;
587 4800 : }
588 : <xdolq>{dolqinside} {
589 4800 : ECHO;
590 24257 : }
591 : <xdolq>{dolqfailed} {
592 24257 : ECHO;
593 574 : }
594 : <xdolq>. {
595 574 : /* This is only needed for $ inside the quoted text */
596 1629 : ECHO;
597 1629 : }
598 :
599 1629 : {xdstart} {
600 6633 : BEGIN(xd);
601 6633 : ECHO;
602 6633 : }
603 : {xuistart} {
604 6633 : BEGIN(xui);
605 16 : ECHO;
606 16 : }
607 : <xd>{xdstop} {
608 16 : BEGIN(INITIAL);
609 6633 : ECHO;
610 6633 : }
611 : <xui>{dquote} {
612 6633 : BEGIN(INITIAL);
613 16 : ECHO;
614 16 : }
615 : <xd,xui>{xddouble} {
616 16 : ECHO;
617 67 : }
618 : <xd,xui>{xdinside} {
619 67 : ECHO;
620 6710 : }
621 :
622 6710 : {xufailed} {
623 0 : /* throw back all but the initial u/U */
624 : yyless(1);
625 0 : ECHO;
626 0 : }
627 :
628 0 : {typecast} {
629 37151 : ECHO;
630 37151 : }
631 :
632 37151 : {dot_dot} {
633 0 : ECHO;
634 0 : }
635 :
636 0 : {colon_equals} {
637 1673 : ECHO;
638 1673 : }
639 :
640 1673 : {equals_greater} {
641 1345 : ECHO;
642 1345 : }
643 :
644 1345 : {less_equals} {
645 1413 : ECHO;
646 1413 : }
647 :
648 1413 : {greater_equals} {
649 4195 : ECHO;
650 4195 : }
651 :
652 4195 : {less_greater} {
653 918 : ECHO;
654 918 : }
655 :
656 918 : {not_equals} {
657 1525 : ECHO;
658 1525 : }
659 :
660 1525 : {right_arrow} {
661 781 : ECHO;
662 781 : }
663 :
664 781 : /*
665 : * These rules are specific to psql --- they implement parenthesis
666 : * counting and detection of command-ending semicolon. These must
667 : * appear before the {self} rule so that they take precedence over it.
668 : */
669 :
670 257294 : "(" {
671 : cur_state->paren_depth++;
672 257294 : ECHO;
673 257294 : }
674 :
675 257294 : ")" {
676 257285 : if (cur_state->paren_depth > 0)
677 257285 : cur_state->paren_depth--;
678 257285 : ECHO;
679 257285 : }
680 :
681 257285 : ";" {
682 245292 : ECHO;
683 245292 : if (cur_state->paren_depth == 0 &&
684 245292 : cur_state->begin_depth == 0)
685 245256 : {
686 : /* Terminate lexing temporarily */
687 : cur_state->start_state = YY_START;
688 245129 : cur_state->init_idents_count = 0;
689 245129 : return LEXRES_SEMI;
690 245129 : }
691 : }
692 :
693 163 : /*
694 : * psql-specific rules to handle backslash commands and variable
695 : * substitution. We want these before {self}, also.
696 : */
697 :
698 512 : "\\"[;:] {
699 : /* Force a semi-colon or colon into the query buffer */
700 : psqlscan_emit(cur_state, yytext + 1, 1);
701 512 : /* Reset BEGIN/END tracking if semi at outer level */
702 : if (yytext[1] == ';' &&
703 512 : cur_state->paren_depth == 0 &&
704 512 : cur_state->begin_depth == 0)
705 512 : cur_state->init_idents_count = 0;
706 512 : }
707 :
708 512 : "\\" {
709 32810 : /* Terminate lexing temporarily */
710 : cur_state->start_state = YY_START;
711 32810 : return LEXRES_BACKSLASH;
712 32810 : }
713 :
714 : :{variable_char}+ {
715 1787 : /* Possible psql variable substitution */
716 : char *varname;
717 : char *value;
718 :
719 : varname = psqlscan_extract_substring(cur_state,
720 1787 : yytext + 1,
721 1787 : yyleng - 1);
722 1787 : if (cur_state->callbacks->get_variable)
723 1787 : value = cur_state->callbacks->get_variable(varname,
724 1191 : PQUOTE_PLAIN,
725 : cur_state->cb_passthrough);
726 : else
727 : value = NULL;
728 596 :
729 : if (value)
730 1787 : {
731 : /* It is a variable, check for recursion */
732 : if (psqlscan_var_is_current_source(cur_state, varname))
733 883 : {
734 : /* Recursive expansion --- don't go there */
735 : pg_log_warning("skipping recursive expansion of variable \"%s\"",
736 0 : varname);
737 : /* Instead copy the string as is */
738 : ECHO;
739 0 : }
740 : else
741 : {
742 : /* OK, perform substitution */
743 : psqlscan_push_new_buffer(cur_state, value, varname);
744 883 : /* yy_scan_string already made buffer active */
745 : }
746 : free(value);
747 883 : }
748 : else
749 : {
750 : /*
751 : * if the variable doesn't exist we'll copy the string
752 : * as is
753 : */
754 : ECHO;
755 904 : }
756 :
757 : free(varname);
758 1787 : }
759 :
760 1787 : :'{variable_char}+' {
761 656 : psqlscan_escape_variable(cur_state, yytext, yyleng,
762 656 : PQUOTE_SQL_LITERAL);
763 : }
764 :
765 656 : :\"{variable_char}+\" {
766 21 : psqlscan_escape_variable(cur_state, yytext, yyleng,
767 21 : PQUOTE_SQL_IDENT);
768 : }
769 :
770 21 : :\{\?{variable_char}+\} {
771 8 : psqlscan_test_variable(cur_state, yytext, yyleng);
772 8 : }
773 :
774 8 : /*
775 : * These rules just avoid the need for scanner backup if one of the
776 : * three rules above fails to match completely.
777 : */
778 :
779 0 : :'{variable_char}* {
780 : /* Throw back everything but the colon */
781 : yyless(1);
782 0 : ECHO;
783 0 : }
784 :
785 0 : :\"{variable_char}* {
786 0 : /* Throw back everything but the colon */
787 : yyless(1);
788 0 : ECHO;
789 0 : }
790 :
791 0 : :\{\?{variable_char}* {
792 0 : /* Throw back everything but the colon */
793 : yyless(1);
794 0 : ECHO;
795 0 : }
796 : :\{ {
797 0 : /* Throw back everything but the colon */
798 0 : yyless(1);
799 0 : ECHO;
800 0 : }
801 :
802 0 : /*
803 : * Back to backend-compatible rules.
804 : */
805 :
806 447764 : {self} {
807 : ECHO;
808 447764 : }
809 :
810 447764 : {operator} {
811 12754 : /*
812 : * Check for embedded slash-star or dash-dash; those
813 : * are comment starts, so operator must stop there.
814 : * Note that slash-star or dash-dash at the first
815 : * character will match a prior rule, not this one.
816 : */
817 : int nchars = yyleng;
818 12754 : char *slashstar = strstr(yytext, "/*");
819 12754 : char *dashdash = strstr(yytext, "--");
820 12754 :
821 : if (slashstar && dashdash)
822 12754 : {
823 : /* if both appear, take the first one */
824 : if (slashstar > dashdash)
825 0 : slashstar = dashdash;
826 0 : }
827 : else if (!slashstar)
828 12754 : slashstar = dashdash;
829 12714 : if (slashstar)
830 12754 : nchars = slashstar - yytext;
831 48 :
832 : /*
833 : * For SQL compatibility, '+' and '-' cannot be the
834 : * last char of a multi-char operator unless the operator
835 : * contains chars that are not in SQL operators.
836 : * The idea is to lex '=-' as two operators, but not
837 : * to forbid operator names like '?-' that could not be
838 : * sequences of SQL operators.
839 : */
840 : if (nchars > 1 &&
841 12754 : (yytext[nchars - 1] == '+' ||
842 11724 : yytext[nchars - 1] == '-'))
843 11720 : {
844 : int ic;
845 :
846 : for (ic = nchars - 2; ic >= 0; ic--)
847 385 : {
848 : char c = yytext[ic];
849 326 : if (c == '~' || c == '!' || c == '@' ||
850 326 : c == '#' || c == '^' || c == '&' ||
851 270 : c == '|' || c == '`' || c == '?' ||
852 106 : c == '%')
853 : break;
854 : }
855 : if (ic < 0)
856 291 : {
857 : /*
858 : * didn't find a qualifying character, so remove
859 : * all trailing [+-]
860 : */
861 : do {
862 : nchars--;
863 59 : } while (nchars > 1 &&
864 59 : (yytext[nchars - 1] == '+' ||
865 23 : yytext[nchars - 1] == '-'));
866 23 : }
867 : }
868 :
869 : if (nchars < yyleng)
870 12754 : {
871 : /* Strip the unwanted chars from the token */
872 : yyless(nchars);
873 107 : }
874 : ECHO;
875 12754 : }
876 :
877 12754 : {param} {
878 924 : ECHO;
879 924 : }
880 : {param_junk} {
881 924 : ECHO;
882 8 : }
883 :
884 8 : {decinteger} {
885 142181 : ECHO;
886 142181 : }
887 : {hexinteger} {
888 142181 : ECHO;
889 83 : }
890 : {octinteger} {
891 83 : ECHO;
892 40 : }
893 : {bininteger} {
894 40 : ECHO;
895 40 : }
896 : {hexfail} {
897 40 : ECHO;
898 4 : }
899 : {octfail} {
900 4 : ECHO;
901 4 : }
902 : {binfail} {
903 4 : ECHO;
904 4 : }
905 : {numeric} {
906 4 : ECHO;
907 5335 : }
908 : {numericfail} {
909 5335 : /* throw back the .., and treat as integer */
910 0 : yyless(yyleng - 2);
911 0 : ECHO;
912 0 : }
913 : {real} {
914 0 : ECHO;
915 506 : }
916 : {realfail} {
917 506 : ECHO;
918 4 : }
919 : {integer_junk} {
920 4 : ECHO;
921 44 : }
922 : {numeric_junk} {
923 44 : ECHO;
924 32 : }
925 : {real_junk} {
926 32 : ECHO;
927 0 : }
928 :
929 0 :
930 1848207 : {identifier} {
931 : psqlscan_track_identifier(cur_state, yytext);
932 1848207 : ECHO;
933 1848207 : }
934 :
935 1848207 : {other} {
936 8 : ECHO;
937 8 : }
938 :
939 8 : <<EOF>> {
940 522199 : if (cur_state->buffer_stack == NULL)
941 522199 : {
942 : cur_state->start_state = YY_START;
943 521316 : return LEXRES_EOL; /* end of input reached */
944 521316 : }
945 :
946 : /*
947 : * We were expanding a variable, so pop the inclusion
948 : * stack and keep lexing
949 : */
950 : psqlscan_pop_buffer_stack(cur_state);
951 883 : psqlscan_select_top_buffer(cur_state);
952 883 : }
953 :
954 883 : %%
955 0 :
956 : /* LCOV_EXCL_STOP */
957 :
958 : /*
959 : * Record the first few keywords/identifiers of a statement or CREATE
960 : * SCHEMA sub-statement in the idents[] array, of length idents_size.
961 : * *idents_count is the number of entries filled so far.
962 : *
963 : * We record the interesting keywords using their first character, which
964 : * works so long as those are all different. We could switch to an enum
965 : * if that stops being true, but for now this is easy and compact.
966 : */
967 : static void
968 : psqlscan_record_initial_keyword(const char *identifier,
969 1397150 : char *idents,
970 : int idents_size,
971 : int *idents_count)
972 : {
973 : if (*idents_count < idents_size)
974 1397150 : {
975 : /*
976 : * What we need to recognize is CREATE [OR REPLACE] FUNCTION/PROCEDURE
977 : * and CREATE SCHEMA. Checking for SCHEMA is useless but not harmful
978 : * in the CREATE SCHEMA sub-statement case.
979 : */
980 : if (pg_strcasecmp(identifier, "create") == 0 ||
981 1624396 : pg_strcasecmp(identifier, "function") == 0 ||
982 1574034 : pg_strcasecmp(identifier, "procedure") == 0 ||
983 1566922 : pg_strcasecmp(identifier, "or") == 0 ||
984 1565123 : pg_strcasecmp(identifier, "replace") == 0 ||
985 1562189 : pg_strcasecmp(identifier, "schema") == 0)
986 780358 : idents[*idents_count] = pg_tolower((unsigned char) identifier[0]);
987 55631 : /* For other keywords or identifiers, leave '\0' in the array entry */
988 : (*idents_count)++;
989 833992 : }
990 : }
991 1397150 :
992 : /*
993 : * Does the current input match CREATE [OR REPLACE] {FUNCTION|PROCEDURE}?
994 : */
995 : static bool
996 : psqlscan_is_create_routine(const char *idents)
997 1397170 : {
998 : return idents[0] == 'c' &&
999 1698496 : (idents[1] == 'f' || idents[1] == 'p' ||
1000 301326 : (idents[1] == 'o' && idents[2] == 'r' &&
1001 270995 : (idents[3] == 'f' || idents[3] == 'p')));
1002 12534 : }
1003 :
1004 : /*
1005 : * Track whether we are inside a BEGIN .. END block in a function definition,
1006 : * so that semicolons contained therein don't terminate the whole statement.
1007 : * Short of writing a full parser here, the following heuristic should work.
1008 : *
1009 : * We track whether the beginning of the statement matches CREATE [OR REPLACE]
1010 : * {FUNCTION|PROCEDURE}. For CREATE SCHEMA, track BEGIN .. END blocks only
1011 : * after recognizing an embedded CREATE [OR REPLACE] {FUNCTION|PROCEDURE}
1012 : * subcommand. Once one of these conditions holds, count BEGIN and END
1013 : * pairs. We also have to account for CASE ... END.
1014 : */
1015 : static void
1016 : psqlscan_track_identifier(PsqlScanState state, const char *identifier)
1017 1848207 : {
1018 : bool is_create_schema;
1019 :
1020 : /* None of this needs to happen when we're inside parentheses */
1021 : if (state->paren_depth != 0)
1022 1848207 : return;
1023 455704 :
1024 : /* Reset all my state at the start of each new statement */
1025 : if (state->init_idents_count == 0)
1026 1392503 : {
1027 : memset(state->init_idents, 0, sizeof(state->init_idents));
1028 252066 : state->sub_idents_count = 0;
1029 252066 : memset(state->sub_idents, 0, sizeof(state->sub_idents));
1030 252066 : }
1031 :
1032 : /* Record initial keywords if init_idents_count is small enough */
1033 : psqlscan_record_initial_keyword(identifier,
1034 1392503 : state->init_idents,
1035 1392503 : lengthof(state->init_idents),
1036 : &state->init_idents_count);
1037 :
1038 : /*
1039 : * In CREATE SCHEMA, track identifiers from each top-level CREATE schema
1040 : * element separately, so that BEGIN/END tracking is enabled only within
1041 : * CREATE [OR REPLACE] {FUNCTION|PROCEDURE} clauses.
1042 : */
1043 : is_create_schema = (state->init_idents[0] == 'c' &&
1044 1690684 : state->init_idents[1] == 's');
1045 298181 : if (is_create_schema &&
1046 1392503 : state->begin_depth == 0)
1047 4667 : {
1048 : /* Reset sub-clause state at each top-level CREATE keyword */
1049 : if (pg_strcasecmp(identifier, "create") == 0)
1050 4647 : {
1051 : state->sub_idents_count = 0;
1052 500 : memset(state->sub_idents, 0, sizeof(state->sub_idents));
1053 500 : }
1054 : /* ... and record the first few keywords following that */
1055 : psqlscan_record_initial_keyword(identifier,
1056 4647 : state->sub_idents,
1057 4647 : lengthof(state->sub_idents),
1058 : &state->sub_idents_count);
1059 : }
1060 :
1061 : /*
1062 : * Track BEGIN/CASE/END only when within an appropriate (sub) statement.
1063 : */
1064 : if (psqlscan_is_create_routine(state->init_idents) ||
1065 1392503 : (is_create_schema &&
1066 4667 : psqlscan_is_create_routine(state->sub_idents)))
1067 4667 : {
1068 : if (pg_strcasecmp(identifier, "begin") == 0)
1069 38444 : state->begin_depth++;
1070 115 : else if (pg_strcasecmp(identifier, "case") == 0)
1071 38329 : {
1072 : /*
1073 : * CASE also ends with END. We only need to track this if we are
1074 : * already inside a BEGIN.
1075 : */
1076 : if (state->begin_depth >= 1)
1077 4 : state->begin_depth++;
1078 4 : }
1079 : else if (pg_strcasecmp(identifier, "end") == 0)
1080 38325 : {
1081 : if (state->begin_depth > 0)
1082 119 : state->begin_depth--;
1083 119 : }
1084 : }
1085 : }
1086 :
1087 : /*
1088 : * Create a lexer working state struct.
1089 : *
1090 : * callbacks is a struct of function pointers that encapsulate some
1091 : * behavior we need from the surrounding program. This struct must
1092 : * remain valid for the lifespan of the PsqlScanState.
1093 : */
1094 : PsqlScanState
1095 : psql_scan_create(const PsqlScanCallbacks *callbacks)
1096 10660 : {
1097 : PsqlScanState state;
1098 :
1099 : state = pg_malloc0_object(PsqlScanStateData);
1100 10660 :
1101 : state->callbacks = callbacks;
1102 10660 :
1103 : yylex_init(&state->scanner);
1104 10660 :
1105 : yyset_extra(state, state->scanner);
1106 10660 :
1107 : psql_scan_reset(state);
1108 10660 :
1109 : return state;
1110 10660 : }
1111 :
1112 : /*
1113 : * Destroy a lexer working state struct, releasing all resources.
1114 : */
1115 : void
1116 : psql_scan_destroy(PsqlScanState state)
1117 10604 : {
1118 : psql_scan_finish(state);
1119 10604 :
1120 : psql_scan_reset(state);
1121 10604 :
1122 : yylex_destroy(state->scanner);
1123 10604 :
1124 : free(state);
1125 10604 : }
1126 10604 :
1127 : /*
1128 : * Set the callback passthrough pointer for the lexer.
1129 : *
1130 : * This could have been integrated into psql_scan_create, but keeping it
1131 : * separate allows the application to change the pointer later, which might
1132 : * be useful.
1133 : */
1134 : void
1135 : psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
1136 10061 : {
1137 : state->cb_passthrough = passthrough;
1138 10061 : }
1139 10061 :
1140 : /*
1141 : * Set up to perform lexing of the given input line.
1142 : *
1143 : * The text at *line, extending for line_len bytes, will be scanned by
1144 : * subsequent calls to the psql_scan routines. psql_scan_finish should
1145 : * be called when scanning is complete. Note that the lexer retains
1146 : * a pointer to the storage at *line --- this string must not be altered
1147 : * or freed until after psql_scan_finish is called.
1148 : *
1149 : * encoding is the libpq identifier for the character encoding in use,
1150 : * and std_strings says whether standard_conforming_strings is on.
1151 : */
1152 : void
1153 : psql_scan_setup(PsqlScanState state,
1154 521665 : const char *line, int line_len,
1155 : int encoding, bool std_strings)
1156 : {
1157 : /* Mustn't be scanning already */
1158 : Assert(state->scanbufhandle == NULL);
1159 : Assert(state->buffer_stack == NULL);
1160 :
1161 : /* Do we need to hack the character set encoding? */
1162 : state->encoding = encoding;
1163 521665 : state->safe_encoding = pg_valid_server_encoding_id(encoding);
1164 521665 :
1165 : /* Save standard-strings flag as well */
1166 : state->std_strings = std_strings;
1167 521665 :
1168 : /* Set up flex input buffer with appropriate translation and padding */
1169 : state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
1170 521665 : &state->scanbuf);
1171 : state->scanline = line;
1172 521665 :
1173 : /* Set lookaside data in case we have to map unsafe encoding */
1174 : state->curline = state->scanbuf;
1175 521665 : state->refline = state->scanline;
1176 521665 :
1177 : /* Initialize state for psql_scan_get_location() */
1178 : state->cur_line_no = 0; /* yylex not called yet */
1179 521665 : state->cur_line_ptr = state->scanbuf;
1180 521665 : }
1181 521665 :
1182 : /*
1183 : * Do lexical analysis of SQL command text.
1184 : *
1185 : * The text previously passed to psql_scan_setup is scanned, and appended
1186 : * (possibly with transformation) to query_buf.
1187 : *
1188 : * The return value indicates the condition that stopped scanning:
1189 : *
1190 : * PSCAN_SEMICOLON: found a command-ending semicolon. (The semicolon is
1191 : * transferred to query_buf.) The command accumulated in query_buf should
1192 : * be executed, then clear query_buf and call again to scan the remainder
1193 : * of the line.
1194 : *
1195 : * PSCAN_BACKSLASH: found a backslash that starts a special command.
1196 : * Any previous data on the line has been transferred to query_buf.
1197 : * The caller will typically next apply a separate flex lexer to scan
1198 : * the special command.
1199 : *
1200 : * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
1201 : * incomplete SQL command. *prompt is set to the appropriate prompt type.
1202 : *
1203 : * PSCAN_EOL: the end of the line was reached, and there is no lexical
1204 : * reason to consider the command incomplete. The caller may or may not
1205 : * choose to send it. *prompt is set to the appropriate prompt type if
1206 : * the caller chooses to collect more input.
1207 : *
1208 : * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
1209 : * be called next, then the cycle may be repeated with a fresh input line.
1210 : *
1211 : * In all cases, *prompt is set to an appropriate prompt type code for the
1212 : * next line-input operation.
1213 : */
1214 : PsqlScanResult
1215 : psql_scan(PsqlScanState state,
1216 799255 : PQExpBuffer query_buf,
1217 : promptStatus_t *prompt)
1218 : {
1219 : PsqlScanResult result;
1220 : int lexresult;
1221 :
1222 : /* Must be scanning already */
1223 : Assert(state->scanbufhandle != NULL);
1224 :
1225 : /* Set current output target */
1226 : state->output_buf = query_buf;
1227 799255 :
1228 : /* Set input source */
1229 : if (state->buffer_stack != NULL)
1230 799255 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
1231 60 : else
1232 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1233 799195 :
1234 : /* And lex. */
1235 : lexresult = yylex(NULL, state->scanner);
1236 799255 :
1237 : /* Notify psql_scan_get_location() that a yylex call has been made. */
1238 : if (state->cur_line_no == 0)
1239 799255 : state->cur_line_no = 1;
1240 521663 :
1241 : /*
1242 : * Check termination state and return appropriate result info.
1243 : */
1244 : switch (lexresult)
1245 799255 : {
1246 : case LEXRES_EOL: /* end of input */
1247 521316 : switch (state->start_state)
1248 521316 : {
1249 : case INITIAL:
1250 489576 : case xqs: /* we treat this like INITIAL */
1251 : if (state->paren_depth > 0)
1252 489576 : {
1253 : result = PSCAN_INCOMPLETE;
1254 43108 : *prompt = PROMPT_PAREN;
1255 43108 : }
1256 : else if (state->begin_depth > 0)
1257 446468 : {
1258 : result = PSCAN_INCOMPLETE;
1259 673 : *prompt = PROMPT_CONTINUE;
1260 673 : }
1261 : else if (query_buf->len > 0)
1262 445795 : {
1263 : result = PSCAN_EOL;
1264 94522 : *prompt = PROMPT_CONTINUE;
1265 94522 : }
1266 : else
1267 : {
1268 : /* never bother to send an empty buffer */
1269 : result = PSCAN_INCOMPLETE;
1270 351273 : *prompt = PROMPT_READY;
1271 351273 : }
1272 : break;
1273 489576 : case xb:
1274 0 : result = PSCAN_INCOMPLETE;
1275 0 : *prompt = PROMPT_SINGLEQUOTE;
1276 0 : break;
1277 0 : case xc:
1278 513 : result = PSCAN_INCOMPLETE;
1279 513 : *prompt = PROMPT_COMMENT;
1280 513 : break;
1281 513 : case xd:
1282 23 : result = PSCAN_INCOMPLETE;
1283 23 : *prompt = PROMPT_DOUBLEQUOTE;
1284 23 : break;
1285 23 : case xh:
1286 0 : result = PSCAN_INCOMPLETE;
1287 0 : *prompt = PROMPT_SINGLEQUOTE;
1288 0 : break;
1289 0 : case xe:
1290 301 : result = PSCAN_INCOMPLETE;
1291 301 : *prompt = PROMPT_SINGLEQUOTE;
1292 301 : break;
1293 301 : case xq:
1294 7046 : result = PSCAN_INCOMPLETE;
1295 7046 : *prompt = PROMPT_SINGLEQUOTE;
1296 7046 : break;
1297 7046 : case xdolq:
1298 23857 : result = PSCAN_INCOMPLETE;
1299 23857 : *prompt = PROMPT_DOLLARQUOTE;
1300 23857 : break;
1301 23857 : case xui:
1302 0 : result = PSCAN_INCOMPLETE;
1303 0 : *prompt = PROMPT_DOUBLEQUOTE;
1304 0 : break;
1305 0 : case xus:
1306 0 : result = PSCAN_INCOMPLETE;
1307 0 : *prompt = PROMPT_SINGLEQUOTE;
1308 0 : break;
1309 0 : default:
1310 0 : /* can't get here */
1311 : fprintf(stderr, "invalid YY_START\n");
1312 0 : exit(1);
1313 0 : }
1314 : break;
1315 521316 : case LEXRES_SEMI: /* semicolon */
1316 245129 : result = PSCAN_SEMICOLON;
1317 245129 : *prompt = PROMPT_READY;
1318 245129 : break;
1319 245129 : case LEXRES_BACKSLASH: /* backslash */
1320 32810 : result = PSCAN_BACKSLASH;
1321 32810 : *prompt = PROMPT_READY;
1322 32810 : break;
1323 32810 : default:
1324 0 : /* can't get here */
1325 : fprintf(stderr, "invalid yylex result\n");
1326 0 : exit(1);
1327 0 : }
1328 :
1329 : return result;
1330 799255 : }
1331 :
1332 : /*
1333 : * Clean up after scanning a string. This flushes any unread input and
1334 : * releases resources (but not the PsqlScanState itself). Note however
1335 : * that this does not reset the lexer scan state; that can be done by
1336 : * psql_scan_reset(), which is an orthogonal operation.
1337 : *
1338 : * It is legal to call this when not scanning anything (makes it easier
1339 : * to deal with error recovery).
1340 : */
1341 : void
1342 : psql_scan_finish(PsqlScanState state)
1343 531898 : {
1344 : /* Drop any incomplete variable expansions. */
1345 : while (state->buffer_stack != NULL)
1346 531898 : psqlscan_pop_buffer_stack(state);
1347 0 :
1348 : /* Done with the outer scan buffer, too */
1349 : if (state->scanbufhandle)
1350 531898 : yy_delete_buffer(state->scanbufhandle, state->scanner);
1351 521610 : state->scanbufhandle = NULL;
1352 531898 : if (state->scanbuf)
1353 531898 : free(state->scanbuf);
1354 521610 : state->scanbuf = NULL;
1355 531898 : }
1356 531898 :
1357 : /*
1358 : * Reset lexer scanning state to start conditions. This is appropriate
1359 : * for executing \r psql commands (or any other time that we discard the
1360 : * prior contents of query_buf). It is not, however, necessary to do this
1361 : * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1362 : * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1363 : * conditions are returned.
1364 : *
1365 : * Note that this is unrelated to flushing unread input; that task is
1366 : * done by psql_scan_finish().
1367 : */
1368 : void
1369 : psql_scan_reset(PsqlScanState state)
1370 23577 : {
1371 : state->start_state = INITIAL;
1372 23577 : state->paren_depth = 0;
1373 23577 : state->xcdepth = 0; /* not really necessary */
1374 23577 : if (state->dolqstart)
1375 23577 : free(state->dolqstart);
1376 0 : state->dolqstart = NULL;
1377 23577 : state->begin_depth = 0;
1378 23577 : state->init_idents_count = 0;
1379 23577 : }
1380 23577 :
1381 : /*
1382 : * Reselect this lexer (psqlscan.l) after using another one.
1383 : *
1384 : * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
1385 : * state, because we'd never switch to another lexer in a different state.
1386 : * However, we don't want to reset e.g. paren_depth, so this can't be
1387 : * the same as psql_scan_reset().
1388 : *
1389 : * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
1390 : * must be a superset of this.
1391 : *
1392 : * Note: it seems likely that other lexers could just assign INITIAL for
1393 : * themselves, since that probably has the value zero in every flex-generated
1394 : * lexer. But let's not assume that.
1395 : */
1396 : void
1397 : psql_scan_reselect_sql_lexer(PsqlScanState state)
1398 155564 : {
1399 : state->start_state = INITIAL;
1400 155564 : }
1401 155564 :
1402 : /*
1403 : * Return true if lexer is currently in an "inside quotes" state.
1404 : *
1405 : * This is pretty grotty but is needed to preserve the old behavior
1406 : * that mainloop.c drops blank lines not inside quotes without even
1407 : * echoing them.
1408 : */
1409 : bool
1410 : psql_scan_in_quote(PsqlScanState state)
1411 100382 : {
1412 : return state->start_state != INITIAL &&
1413 101000 : state->start_state != xqs;
1414 618 : }
1415 :
1416 : /*
1417 : * Return the current scanning location (end+1 of last scanned token),
1418 : * as a line number counted from 1 and an offset from string start.
1419 : *
1420 : * This considers only the outermost input string, and therefore is of
1421 : * limited use for programs that use psqlscan_push_new_buffer().
1422 : *
1423 : * It would be a bit easier probably to use "%option yylineno" to count
1424 : * lines, but the flex manual says that has a performance cost, and only
1425 : * a minority of programs using psqlscan have need for this functionality.
1426 : * So we implement it ourselves without adding overhead to the lexer itself.
1427 : */
1428 : void
1429 : psql_scan_get_location(PsqlScanState state,
1430 1737 : int *lineno, int *offset)
1431 : {
1432 : const char *line_end;
1433 :
1434 : /*
1435 : * We rely on flex's having stored a NUL after the current token in
1436 : * scanbuf. Therefore we must specially handle the state before yylex()
1437 : * has been called, when obviously that won't have happened yet.
1438 : */
1439 : if (state->cur_line_no == 0)
1440 1737 : {
1441 : *lineno = 1;
1442 0 : *offset = 0;
1443 0 : return;
1444 0 : }
1445 :
1446 : /*
1447 : * Advance cur_line_no/cur_line_ptr past whatever has been lexed so far.
1448 : * Doing this prevents repeated calls from being O(N^2) for long inputs.
1449 : */
1450 : while ((line_end = strchr(state->cur_line_ptr, '\n')) != NULL)
1451 2210 : {
1452 : state->cur_line_no++;
1453 473 : state->cur_line_ptr = line_end + 1;
1454 473 : }
1455 : state->cur_line_ptr += strlen(state->cur_line_ptr);
1456 1737 :
1457 : /* Report current location. */
1458 : *lineno = state->cur_line_no;
1459 1737 : *offset = state->cur_line_ptr - state->scanbuf;
1460 1737 : }
1461 :
1462 : /*
1463 : * Push the given string onto the stack of stuff to scan.
1464 : *
1465 : * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1466 : */
1467 : void
1468 : psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
1469 883 : const char *varname)
1470 : {
1471 : StackElem *stackelem;
1472 :
1473 : stackelem = pg_malloc_object(StackElem);
1474 883 :
1475 : /*
1476 : * In current usage, the passed varname points at the current flex input
1477 : * buffer; we must copy it before calling psqlscan_prepare_buffer()
1478 : * because that will change the buffer state.
1479 : */
1480 : stackelem->varname = varname ? pg_strdup(varname) : NULL;
1481 883 :
1482 : stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
1483 883 : &stackelem->bufstring);
1484 : state->curline = stackelem->bufstring;
1485 883 : if (state->safe_encoding)
1486 883 : {
1487 : stackelem->origstring = NULL;
1488 883 : state->refline = stackelem->bufstring;
1489 883 : }
1490 : else
1491 : {
1492 : stackelem->origstring = pg_strdup(newstr);
1493 0 : state->refline = stackelem->origstring;
1494 0 : }
1495 : stackelem->next = state->buffer_stack;
1496 883 : state->buffer_stack = stackelem;
1497 883 : }
1498 883 :
1499 : /*
1500 : * Pop the topmost buffer stack item (there must be one!)
1501 : *
1502 : * NB: after this, the flex input state is unspecified; caller must
1503 : * switch to an appropriate buffer to continue lexing.
1504 : * See psqlscan_select_top_buffer().
1505 : */
1506 : void
1507 : psqlscan_pop_buffer_stack(PsqlScanState state)
1508 883 : {
1509 : StackElem *stackelem = state->buffer_stack;
1510 883 :
1511 : state->buffer_stack = stackelem->next;
1512 883 : yy_delete_buffer(stackelem->buf, state->scanner);
1513 883 : free(stackelem->bufstring);
1514 883 : if (stackelem->origstring)
1515 883 : free(stackelem->origstring);
1516 0 : if (stackelem->varname)
1517 883 : free(stackelem->varname);
1518 883 : free(stackelem);
1519 883 : }
1520 883 :
1521 : /*
1522 : * Select the topmost surviving buffer as the active input.
1523 : */
1524 : void
1525 : psqlscan_select_top_buffer(PsqlScanState state)
1526 883 : {
1527 : StackElem *stackelem = state->buffer_stack;
1528 883 :
1529 : if (stackelem != NULL)
1530 883 : {
1531 : yy_switch_to_buffer(stackelem->buf, state->scanner);
1532 0 : state->curline = stackelem->bufstring;
1533 0 : state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
1534 0 : }
1535 : else
1536 : {
1537 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1538 883 : state->curline = state->scanbuf;
1539 883 : state->refline = state->scanline;
1540 883 : }
1541 : }
1542 883 :
1543 : /*
1544 : * Check if specified variable name is the source for any string
1545 : * currently being scanned
1546 : */
1547 : bool
1548 : psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
1549 883 : {
1550 : StackElem *stackelem;
1551 :
1552 : for (stackelem = state->buffer_stack;
1553 883 : stackelem != NULL;
1554 883 : stackelem = stackelem->next)
1555 0 : {
1556 : if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
1557 0 : return true;
1558 0 : }
1559 : return false;
1560 883 : }
1561 :
1562 : /*
1563 : * Set up a flex input buffer to scan the given data. We always make a
1564 : * copy of the data. If working in an unsafe encoding, the copy has
1565 : * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1566 : *
1567 : * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1568 : */
1569 : YY_BUFFER_STATE
1570 : psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
1571 522548 : char **txtcopy)
1572 : {
1573 : char *newtxt;
1574 :
1575 : /* Flex wants two \0 characters after the actual data */
1576 : newtxt = pg_malloc_array(char, (len + 2));
1577 522548 : *txtcopy = newtxt;
1578 522548 : newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1579 522548 :
1580 : if (state->safe_encoding)
1581 522548 : memcpy(newtxt, txt, len);
1582 522408 : else
1583 : {
1584 : /* Gotta do it the hard way */
1585 : int i = 0;
1586 140 :
1587 : while (i < len)
1588 808 : {
1589 : int thislen = PQmblen(txt + i, state->encoding);
1590 668 :
1591 : /* first byte should always be okay... */
1592 : newtxt[i] = txt[i];
1593 668 : i++;
1594 668 : while (--thislen > 0 && i < len)
1595 808 : newtxt[i++] = (char) 0xFF;
1596 140 : }
1597 : }
1598 :
1599 : return yy_scan_buffer(newtxt, len + 2, state->scanner);
1600 522548 : }
1601 :
1602 : /*
1603 : * psqlscan_emit() --- body for ECHO macro
1604 : *
1605 : * NB: this must be used for ALL and ONLY the text copied from the flex
1606 : * input data. If you pass it something that is not part of the yytext
1607 : * string, you are making a mistake. Internally generated text can be
1608 : * appended directly to state->output_buf.
1609 : */
1610 : void
1611 : psqlscan_emit(PsqlScanState state, const char *txt, int len)
1612 6602050 : {
1613 : PQExpBuffer output_buf = state->output_buf;
1614 6602050 :
1615 : if (state->safe_encoding)
1616 6602050 : appendBinaryPQExpBuffer(output_buf, txt, len);
1617 6601574 : else
1618 : {
1619 : /* Gotta do it the hard way */
1620 : const char *reference = state->refline;
1621 476 : int i;
1622 :
1623 : reference += (txt - state->curline);
1624 476 :
1625 : for (i = 0; i < len; i++)
1626 1277 : {
1627 : char ch = txt[i];
1628 801 :
1629 : if (ch == (char) 0xFF)
1630 801 : ch = reference[i];
1631 140 : appendPQExpBufferChar(output_buf, ch);
1632 801 : }
1633 : }
1634 : }
1635 6602050 :
1636 : /*
1637 : * psqlscan_extract_substring --- fetch value of (part of) the current token
1638 : *
1639 : * This is like psqlscan_emit(), except that the data is returned as a
1640 : * malloc'd string rather than being pushed directly to state->output_buf.
1641 : */
1642 : char *
1643 : psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
1644 3391 : {
1645 : char *result = pg_malloc_array(char, (len + 1));
1646 3391 :
1647 : if (state->safe_encoding)
1648 3391 : memcpy(result, txt, len);
1649 3391 : else
1650 : {
1651 : /* Gotta do it the hard way */
1652 : const char *reference = state->refline;
1653 0 : int i;
1654 :
1655 : reference += (txt - state->curline);
1656 0 :
1657 : for (i = 0; i < len; i++)
1658 0 : {
1659 : char ch = txt[i];
1660 0 :
1661 : if (ch == (char) 0xFF)
1662 0 : ch = reference[i];
1663 0 : result[i] = ch;
1664 0 : }
1665 : }
1666 : result[len] = '\0';
1667 3391 : return result;
1668 3391 : }
1669 :
1670 : /*
1671 : * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
1672 : *
1673 : * If the variable name is found, escape its value using the appropriate
1674 : * quoting method and emit the value to output_buf. (Since the result is
1675 : * surely quoted, there is never any reason to rescan it.) If we don't
1676 : * find the variable or escaping fails, emit the token as-is.
1677 : */
1678 : void
1679 : psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
1680 721 : PsqlScanQuoteType quote)
1681 : {
1682 : char *varname;
1683 : char *value;
1684 :
1685 : /* Variable lookup. */
1686 : varname = psqlscan_extract_substring(state, txt + 2, len - 3);
1687 721 : if (state->callbacks->get_variable)
1688 721 : value = state->callbacks->get_variable(varname, quote,
1689 721 : state->cb_passthrough);
1690 : else
1691 : value = NULL;
1692 0 : free(varname);
1693 721 :
1694 : if (value)
1695 721 : {
1696 : /* Emit the suitably-escaped value */
1697 : appendPQExpBufferStr(state->output_buf, value);
1698 684 : free(value);
1699 684 : }
1700 : else
1701 : {
1702 : /* Emit original token as-is */
1703 : psqlscan_emit(state, txt, len);
1704 37 : }
1705 : }
1706 721 :
1707 : void
1708 : psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
1709 21 : {
1710 : char *varname;
1711 : char *value;
1712 :
1713 : varname = psqlscan_extract_substring(state, txt + 3, len - 4);
1714 21 : if (state->callbacks->get_variable)
1715 21 : value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
1716 21 : state->cb_passthrough);
1717 : else
1718 : value = NULL;
1719 0 : free(varname);
1720 21 :
1721 : if (value != NULL)
1722 21 : {
1723 : appendPQExpBufferStr(state->output_buf, "TRUE");
1724 9 : free(value);
1725 9 : }
1726 : else
1727 : {
1728 : appendPQExpBufferStr(state->output_buf, "FALSE");
1729 12 : }
1730 : }
1731 21 : /* END: function "psqlscan_test_variable" */
|