Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * psqlscan.l
5 : * lexical scanner for SQL commands
6 : *
7 : * This lexer used to be part of psql, and that heritage is reflected in
8 : * the file name as well as function and typedef names, though it can now
9 : * be used by other frontend programs as well. It's also possible to extend
10 : * this lexer with a compatible add-on lexer to handle program-specific
11 : * backslash commands.
12 : *
13 : * This code is mainly concerned with determining where the end of a SQL
14 : * statement is: we are looking for semicolons that are not within quotes,
15 : * comments, or parentheses. The most reliable way to handle this is to
16 : * borrow the backend's flex lexer rules, lock, stock, and barrel. The rules
17 : * below are (except for a few) the same as the backend's, but their actions
18 : * are just ECHO whereas the backend's actions generally do other things.
19 : *
20 : * XXX The rules in this file must be kept in sync with the backend lexer!!!
21 : *
22 : * XXX Avoid creating backtracking cases --- see the backend lexer for info.
23 : *
24 : * See psqlscan_int.h for additional commentary.
25 : *
26 : *
27 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
28 : * Portions Copyright (c) 1994, Regents of the University of California
29 : *
30 : * IDENTIFICATION
31 : * src/fe_utils/psqlscan.l
32 : *
33 : *-------------------------------------------------------------------------
34 : */
35 : #include "postgres_fe.h"
36 :
37 : #include "common/logging.h"
38 : #include "fe_utils/psqlscan.h"
39 :
40 : #include "libpq-fe.h"
41 : }
42 :
43 : %{
44 :
45 : /* LCOV_EXCL_START */
46 :
47 : #include "fe_utils/psqlscan_int.h"
48 :
49 : /*
50 : * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
51 : * doesn't presently make use of that argument, so just declare it as int.
52 : */
53 : typedef int YYSTYPE;
54 :
55 : /*
56 : * Set the type of yyextra; we use it as a pointer back to the containing
57 : * PsqlScanState.
58 : */
59 : #define YY_EXTRA_TYPE PsqlScanState
60 :
61 :
62 : /* Return values from yylex() */
63 : #define LEXRES_EOL 0 /* end of input */
64 : #define LEXRES_SEMI 1 /* command-terminating semicolon found */
65 : #define LEXRES_BACKSLASH 2 /* backslash command start */
66 :
67 :
68 : #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
69 :
70 : /*
71 : * Work around a bug in flex 2.5.35: it emits a couple of functions that
72 : * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
73 : * this would cause warnings. Providing our own declarations should be
74 : * harmless even when the bug gets fixed.
75 : */
76 : extern int psql_yyget_column(yyscan_t yyscanner);
77 : extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
78 :
79 : %}
80 :
81 : %option reentrant
82 : %option bison-bridge
83 : %option 8bit
84 : %option never-interactive
85 : %option nodefault
86 : %option noinput
87 : %option nounput
88 : %option noyywrap
89 : %option warn
90 : %option prefix="psql_yy"
91 :
92 : /*
93 : * All of the following definitions and rules should exactly match
94 : * src/backend/parser/scan.l so far as the flex patterns are concerned.
95 : * The rule bodies are just ECHO as opposed to what the backend does,
96 : * however. (But be sure to duplicate code that affects the lexing process,
97 : * such as BEGIN() and yyless().) Also, psqlscan uses a single <<EOF>> rule
98 : * whereas scan.l has a separate one for each exclusive state.
99 : */
100 :
101 : /*
102 : * OK, here is a short description of lex/flex rules behavior.
103 : * The longest pattern which matches an input string is always chosen.
104 : * For equal-length patterns, the first occurring in the rules list is chosen.
105 : * INITIAL is the starting state, to which all non-conditional rules apply.
106 : * Exclusive states change parsing rules while the state is active. When in
107 : * an exclusive state, only those rules defined for that state apply.
108 : *
109 : * We use exclusive states for quoted strings, extended comments,
110 : * and to eliminate parsing troubles for numeric strings.
111 : * Exclusive states:
112 : * <xb> bit string literal
113 : * <xc> extended C-style comments
114 : * <xd> delimited identifiers (double-quoted identifiers)
115 : * <xh> hexadecimal byte string
116 : * <xq> standard quoted strings
117 : * <xqs> quote stop (detect continued strings)
118 : * <xe> extended quoted strings (support backslash escape sequences)
119 : * <xdolq> $foo$ quoted strings
120 : * <xui> quoted identifier with Unicode escapes
121 : * <xus> quoted string with Unicode escapes
122 : *
123 : * Note: we intentionally don't mimic the backend's <xeu> state; we have
124 : * no need to distinguish it from <xe> state, and no good way to get out
125 : * of it in error cases. The backend just throws yyerror() in those
126 : * cases, but that's not an option here.
127 : */
128 :
129 : %x xb
130 : %x xc
131 : %x xd
132 : %x xh
133 : %x xq
134 : %x xqs
135 : %x xe
136 : %x xdolq
137 : %x xui
138 : %x xus
139 :
140 : /*
141 : * In order to make the world safe for Windows and Mac clients as well as
142 : * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
143 : * sequence will be seen as two successive newlines, but that doesn't cause
144 : * any problems. Comments that start with -- and extend to the next
145 : * newline are treated as equivalent to a single whitespace character.
146 : *
147 : * NOTE a fine point: if there is no newline following --, we will absorb
148 : * everything to the end of the input as a comment. This is correct. Older
149 : * versions of Postgres failed to recognize -- as a comment if the input
150 : * did not end with a newline.
151 : *
152 : * non_newline_space tracks all space characters except newlines.
153 : *
154 : * XXX if you change the set of whitespace characters, fix scanner_isspace()
155 : * to agree.
156 : */
157 :
158 : space [ \t\n\r\f\v]
159 : non_newline_space [ \t\f\v]
160 : newline [\n\r]
161 : non_newline [^\n\r]
162 :
163 : comment ("--"{non_newline}*)
164 :
165 : whitespace ({space}+|{comment})
166 :
167 : /*
168 : * SQL requires at least one newline in the whitespace separating
169 : * string literals that are to be concatenated. Silly, but who are we
170 : * to argue? Note that {whitespace_with_newline} should not have * after
171 : * it, whereas {whitespace} should generally have a * after it...
172 : */
173 :
174 : special_whitespace ({space}+|{comment}{newline})
175 : non_newline_whitespace ({non_newline_space}|{comment})
176 : whitespace_with_newline ({non_newline_whitespace}*{newline}{special_whitespace}*)
177 :
178 : quote '
179 : /* If we see {quote} then {quotecontinue}, the quoted string continues */
180 : quotecontinue {whitespace_with_newline}{quote}
181 :
182 : /*
183 : * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
184 : * {quotecontinue}. It might seem that this could just be {whitespace}*,
185 : * but if there's a dash after {whitespace_with_newline}, it must be consumed
186 : * to see if there's another dash --- which would start a {comment} and thus
187 : * allow continuation of the {quotecontinue} token.
188 : */
189 : quotecontinuefail {whitespace}*"-"?
190 :
191 : /* Bit string
192 : * It is tempting to scan the string for only those characters
193 : * which are allowed. However, this leads to silently swallowed
194 : * characters if illegal characters are included in the string.
195 : * For example, if xbinside is [01] then B'ABCD' is interpreted
196 : * as a zero-length string, and the ABCD' is lost!
197 : * Better to pass the string forward and let the input routines
198 : * validate the contents.
199 : */
200 : xbstart [bB]{quote}
201 : xbinside [^']*
202 :
203 : /* Hexadecimal byte string */
204 : xhstart [xX]{quote}
205 : xhinside [^']*
206 :
207 : /* National character */
208 : xnstart [nN]{quote}
209 :
210 : /* Quoted string that allows backslash escapes */
211 : xestart [eE]{quote}
212 : xeinside [^\\']+
213 : xeescape [\\][^0-7]
214 : xeoctesc [\\][0-7]{1,3}
215 : xehexesc [\\]x[0-9A-Fa-f]{1,2}
216 : xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
217 : xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
218 :
219 : /* Extended quote
220 : * xqdouble implements embedded quote, ''''
221 : */
222 : xqstart {quote}
223 : xqdouble {quote}{quote}
224 : xqinside [^']+
225 :
226 : /* $foo$ style quotes ("dollar quoting")
227 : * The quoted string starts with $foo$ where "foo" is an optional string
228 : * in the form of an identifier, except that it may not contain "$",
229 : * and extends to the first occurrence of an identical string.
230 : * There is *no* processing of the quoted text.
231 : *
232 : * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
233 : * fails to match its trailing "$".
234 : */
235 : dolq_start [A-Za-z\200-\377_]
236 : dolq_cont [A-Za-z\200-\377_0-9]
237 : dolqdelim \$({dolq_start}{dolq_cont}*)?\$
238 : dolqfailed \${dolq_start}{dolq_cont}*
239 : dolqinside [^$]+
240 :
241 : /* Double quote
242 : * Allows embedded spaces and other special characters into identifiers.
243 : */
244 : dquote \"
245 : xdstart {dquote}
246 : xdstop {dquote}
247 : xddouble {dquote}{dquote}
248 : xdinside [^"]+
249 :
250 : /* Quoted identifier with Unicode escapes */
251 : xuistart [uU]&{dquote}
252 :
253 : /* Quoted string with Unicode escapes */
254 : xusstart [uU]&{quote}
255 :
256 : /* error rule to avoid backup */
257 : xufailed [uU]&
258 :
259 :
260 : /* C-style comments
261 : *
262 : * The "extended comment" syntax closely resembles allowable operator syntax.
263 : * The tricky part here is to get lex to recognize a string starting with
264 : * slash-star as a comment, when interpreting it as an operator would produce
265 : * a longer match --- remember lex will prefer a longer match! Also, if we
266 : * have something like plus-slash-star, lex will think this is a 3-character
267 : * operator whereas we want to see it as a + operator and a comment start.
268 : * The solution is two-fold:
269 : * 1. append {op_chars}* to xcstart so that it matches as much text as
270 : * {operator} would. Then the tie-breaker (first matching rule of same
271 : * length) ensures xcstart wins. We put back the extra stuff with yyless()
272 : * in case it contains a star-slash that should terminate the comment.
273 : * 2. In the operator rule, check for slash-star within the operator, and
274 : * if found throw it back with yyless(). This handles the plus-slash-star
275 : * problem.
276 : * Dash-dash comments have similar interactions with the operator rule.
277 : */
278 : xcstart \/\*{op_chars}*
279 : xcstop \*+\/
280 : xcinside [^*/]+
281 :
282 : ident_start [A-Za-z\200-\377_]
283 : ident_cont [A-Za-z\200-\377_0-9\$]
284 :
285 : identifier {ident_start}{ident_cont}*
286 :
287 : /* Assorted special-case operators and operator-like tokens */
288 : typecast "::"
289 : dot_dot \.\.
290 : colon_equals ":="
291 :
292 : /*
293 : * These operator-like tokens (unlike the above ones) also match the {operator}
294 : * rule, which means that they might be overridden by a longer match if they
295 : * are followed by a comment start or a + or - character. Accordingly, if you
296 : * add to this list, you must also add corresponding code to the {operator}
297 : * block to return the correct token in such cases. (This is not needed in
298 : * psqlscan.l since the token value is ignored there.)
299 : */
300 : equals_greater "=>"
301 : less_equals "<="
302 : greater_equals ">="
303 : less_greater "<>"
304 : not_equals "!="
305 :
306 : /*
307 : * "self" is the set of chars that should be returned as single-character
308 : * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
309 : * which can be one or more characters long (but if a single-char token
310 : * appears in the "self" set, it is not to be returned as an Op). Note
311 : * that the sets overlap, but each has some chars that are not in the other.
312 : *
313 : * If you change either set, adjust the character lists appearing in the
314 : * rule for "operator"!
315 : */
316 : self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
317 : op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
318 : operator {op_chars}+
319 :
320 : /*
321 : * Numbers
322 : *
323 : * Unary minus is not part of a number here. Instead we pass it separately to
324 : * the parser, and there it gets coerced via doNegate().
325 : *
326 : * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
327 : *
328 : * {realfail} is added to prevent the need for scanner
329 : * backup when the {real} rule fails to match completely.
330 : */
331 : decdigit [0-9]
332 : hexdigit [0-9A-Fa-f]
333 : octdigit [0-7]
334 : bindigit [0-1]
335 :
336 : decinteger {decdigit}(_?{decdigit})*
337 : hexinteger 0[xX](_?{hexdigit})+
338 : octinteger 0[oO](_?{octdigit})+
339 : bininteger 0[bB](_?{bindigit})+
340 :
341 : hexfail 0[xX]_?
342 : octfail 0[oO]_?
343 : binfail 0[bB]_?
344 :
345 : numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
346 : numericfail {decinteger}\.\.
347 :
348 : real ({decinteger}|{numeric})[Ee][-+]?{decinteger}
349 : realfail ({decinteger}|{numeric})[Ee][-+]
350 :
351 : /* Positional parameters don't accept underscores. */
352 : param \${decdigit}+
353 :
354 : /*
355 : * An identifier immediately following an integer literal is disallowed because
356 : * in some cases it's ambiguous what is meant: for example, 0x1234 could be
357 : * either a hexinteger or a decinteger "0" and an identifier "x1234". We can
358 : * detect such problems by seeing if integer_junk matches a longer substring
359 : * than any of the XXXinteger patterns (decinteger, hexinteger, octinteger,
360 : * bininteger). One "junk" pattern is sufficient because
361 : * {decinteger}{identifier} will match all the same strings we'd match with
362 : * {hexinteger}{identifier} etc.
363 : *
364 : * Note that the rule for integer_junk must appear after the ones for
365 : * XXXinteger to make this work correctly: 0x1234 will match both hexinteger
366 : * and integer_junk, and we need hexinteger to be chosen in that case.
367 : *
368 : * Also disallow strings matched by numeric_junk, real_junk and param_junk
369 : * for consistency.
370 : */
371 : integer_junk {decinteger}{identifier}
372 : numeric_junk {numeric}{identifier}
373 : real_junk {real}{identifier}
374 : param_junk \${decdigit}+{identifier}
375 :
376 : /* psql-specific: characters allowed in variable names */
377 : variable_char [A-Za-z\200-\377_0-9]
378 :
379 : other .
380 :
381 : /*
382 : * Dollar quoted strings are totally opaque, and no escaping is done on them.
383 : * Other quoted strings must allow some special characters such as single-quote
384 : * and newline.
385 : * Embedded single-quotes are implemented both in the SQL standard
386 : * style of two adjacent single quotes "''" and in the Postgres/Java style
387 : * of escaped-quote "\'".
388 : * Other embedded escaped characters are matched explicitly and the leading
389 : * backslash is dropped from the string.
390 : * Note that xcstart must appear before operator, as explained above!
391 : * Also whitespace (comment) must appear before operator.
392 : */
393 :
394 : %%
395 :
396 : %{
397 : /* Declare some local variables inside yylex(), for convenience */
398 : PsqlScanState cur_state = yyextra;
399 : PQExpBuffer output_buf = cur_state->output_buf;
400 :
401 : /*
402 : * Force flex into the state indicated by start_state. This has a
403 : * couple of purposes: it lets some of the functions below set a new
404 : * starting state without ugly direct access to flex variables, and it
405 : * allows us to transition from one flex lexer to another so that we
406 : * can lex different parts of the source string using separate lexers.
407 : */
408 : BEGIN(cur_state->start_state);
409 : %}
410 :
411 : {whitespace} {
412 : /*
413 : * Note that the whitespace rule includes both true
414 : * whitespace and single-line ("--" style) comments.
415 : * We suppress whitespace until we have collected some
416 : * non-whitespace data. (This interacts with some
417 : * decisions in MainLoop(); see there for details.)
418 : */
419 : if (output_buf->len > 0)
420 : ECHO;
421 : }
422 :
423 : {xcstart} {
424 : cur_state->xcdepth = 0;
425 : BEGIN(xc);
426 : /* Put back any characters past slash-star; see above */
427 : yyless(2);
428 : ECHO;
429 : }
430 :
431 : <xc>{
432 : {xcstart} {
433 : cur_state->xcdepth++;
434 : /* Put back any characters past slash-star; see above */
435 : yyless(2);
436 : ECHO;
437 : }
438 :
439 : {xcstop} {
440 : if (cur_state->xcdepth <= 0)
441 : BEGIN(INITIAL);
442 : else
443 : cur_state->xcdepth--;
444 : ECHO;
445 : }
446 :
447 : {xcinside} {
448 : ECHO;
449 : }
450 :
451 : {op_chars} {
452 : ECHO;
453 : }
454 :
455 : \*+ {
456 : ECHO;
457 : }
458 : } /* <xc> */
459 :
460 : {xbstart} {
461 : BEGIN(xb);
462 : ECHO;
463 : }
464 : <xh>{xhinside} |
465 : <xb>{xbinside} {
466 : ECHO;
467 : }
468 :
469 : {xhstart} {
470 : /* Hexadecimal bit type.
471 : * At some point we should simply pass the string
472 : * forward to the parser and label it there.
473 : * In the meantime, place a leading "x" on the string
474 : * to mark it for the input routine as a hex string.
475 : */
476 : BEGIN(xh);
477 : ECHO;
478 : }
479 :
480 : {xnstart} {
481 : yyless(1); /* eat only 'n' this time */
482 : ECHO;
483 : }
484 :
485 : {xqstart} {
486 : if (cur_state->std_strings)
487 : BEGIN(xq);
488 : else
489 : BEGIN(xe);
490 : ECHO;
491 : }
492 : {xestart} {
493 : BEGIN(xe);
494 : ECHO;
495 : }
496 : {xusstart} {
497 : BEGIN(xus);
498 : ECHO;
499 : }
500 :
501 : <xb,xh,xq,xe,xus>{quote} {
502 : /*
503 : * When we are scanning a quoted string and see an end
504 : * quote, we must look ahead for a possible continuation.
505 : * If we don't see one, we know the end quote was in fact
506 : * the end of the string. To reduce the lexer table size,
507 : * we use a single "xqs" state to do the lookahead for all
508 : * types of strings.
509 : */
510 : cur_state->state_before_str_stop = YYSTATE;
511 : BEGIN(xqs);
512 : ECHO;
513 : }
514 : <xqs>{quotecontinue} {
515 : /*
516 : * Found a quote continuation, so return to the in-quote
517 : * state and continue scanning the literal. Nothing is
518 : * added to the literal's contents.
519 : */
520 : BEGIN(cur_state->state_before_str_stop);
521 : ECHO;
522 : }
523 : <xqs>{quotecontinuefail} |
524 : <xqs>{other} {
525 : /*
526 : * Failed to see a quote continuation. Throw back
527 : * everything after the end quote, and handle the string
528 : * according to the state we were in previously.
529 : */
530 : yyless(0);
531 : BEGIN(INITIAL);
532 : /* There's nothing to echo ... */
533 : }
534 :
535 : <xq,xe,xus>{xqdouble} {
536 : ECHO;
537 : }
538 : <xq,xus>{xqinside} {
539 : ECHO;
540 : }
541 : <xe>{xeinside} {
542 : ECHO;
543 : }
544 : <xe>{xeunicode} {
545 : ECHO;
546 : }
547 : <xe>{xeunicodefail} {
548 : ECHO;
549 : }
550 : <xe>{xeescape} {
551 : ECHO;
552 : }
553 : <xe>{xeoctesc} {
554 : ECHO;
555 : }
556 : <xe>{xehexesc} {
557 : ECHO;
558 : }
559 : <xe>. {
560 : /* This is only needed for \ just before EOF */
561 : ECHO;
562 : }
563 :
564 : {dolqdelim} {
565 : cur_state->dolqstart = pg_strdup(yytext);
566 : BEGIN(xdolq);
567 : ECHO;
568 : }
569 : {dolqfailed} {
570 : /* throw back all but the initial "$" */
571 : yyless(1);
572 : ECHO;
573 : }
574 : <xdolq>{dolqdelim} {
575 : if (strcmp(yytext, cur_state->dolqstart) == 0)
576 : {
577 : free(cur_state->dolqstart);
578 : cur_state->dolqstart = NULL;
579 : BEGIN(INITIAL);
580 : }
581 : else
582 : {
583 : /*
584 : * When we fail to match $...$ to dolqstart, transfer
585 : * the $... part to the output, but put back the final
586 : * $ for rescanning. Consider $delim$...$junk$delim$
587 : */
588 : yyless(yyleng - 1);
589 : }
590 : ECHO;
591 : }
592 : <xdolq>{dolqinside} {
593 : ECHO;
594 : }
595 : <xdolq>{dolqfailed} {
596 : ECHO;
597 : }
598 : <xdolq>. {
599 : /* This is only needed for $ inside the quoted text */
600 : ECHO;
601 : }
602 :
603 : {xdstart} {
604 : BEGIN(xd);
605 : ECHO;
606 : }
607 : {xuistart} {
608 : BEGIN(xui);
609 : ECHO;
610 : }
611 : <xd>{xdstop} {
612 : BEGIN(INITIAL);
613 : ECHO;
614 : }
615 : <xui>{dquote} {
616 : BEGIN(INITIAL);
617 : ECHO;
618 : }
619 : <xd,xui>{xddouble} {
620 : ECHO;
621 : }
622 : <xd,xui>{xdinside} {
623 : ECHO;
624 : }
625 :
626 : {xufailed} {
627 : /* throw back all but the initial u/U */
628 : yyless(1);
629 : ECHO;
630 : }
631 :
632 : {typecast} {
633 : ECHO;
634 : }
635 :
636 : {dot_dot} {
637 : ECHO;
638 : }
639 :
640 : {colon_equals} {
641 : ECHO;
642 : }
643 :
644 : {equals_greater} {
645 : ECHO;
646 : }
647 :
648 : {less_equals} {
649 : ECHO;
650 : }
651 :
652 : {greater_equals} {
653 : ECHO;
654 : }
655 :
656 : {less_greater} {
657 : ECHO;
658 : }
659 :
660 : {not_equals} {
661 : ECHO;
662 : }
663 :
664 : /*
665 : * These rules are specific to psql --- they implement parenthesis
666 : * counting and detection of command-ending semicolon. These must
667 : * appear before the {self} rule so that they take precedence over it.
668 : */
669 :
670 : "(" {
671 : cur_state->paren_depth++;
672 : ECHO;
673 : }
674 :
675 : ")" {
676 : if (cur_state->paren_depth > 0)
677 : cur_state->paren_depth--;
678 : ECHO;
679 : }
680 :
681 : ";" {
682 : ECHO;
683 : if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
684 : {
685 : /* Terminate lexing temporarily */
686 : cur_state->start_state = YY_START;
687 : cur_state->identifier_count = 0;
688 : return LEXRES_SEMI;
689 : }
690 : }
691 :
692 : /*
693 : * psql-specific rules to handle backslash commands and variable
694 : * substitution. We want these before {self}, also.
695 : */
696 :
697 : "\\"[;:] {
698 : /* Force a semi-colon or colon into the query buffer */
699 : psqlscan_emit(cur_state, yytext + 1, 1);
700 : if (yytext[1] == ';')
701 : cur_state->identifier_count = 0;
702 : }
703 :
704 : "\\" {
705 : /* Terminate lexing temporarily */
706 : cur_state->start_state = YY_START;
707 : return LEXRES_BACKSLASH;
708 : }
709 :
710 : :{variable_char}+ {
711 : /* Possible psql variable substitution */
712 : char *varname;
713 : char *value;
714 :
715 : varname = psqlscan_extract_substring(cur_state,
716 : yytext + 1,
717 : yyleng - 1);
718 : if (cur_state->callbacks->get_variable)
719 : value = cur_state->callbacks->get_variable(varname,
720 : PQUOTE_PLAIN,
721 : cur_state->cb_passthrough);
722 : else
723 : value = NULL;
724 :
725 : if (value)
726 : {
727 : /* It is a variable, check for recursion */
728 : if (psqlscan_var_is_current_source(cur_state, varname))
729 : {
730 : /* Recursive expansion --- don't go there */
731 : pg_log_warning("skipping recursive expansion of variable \"%s\"",
732 : varname);
733 : /* Instead copy the string as is */
734 : ECHO;
735 : }
736 : else
737 : {
738 : /* OK, perform substitution */
739 : psqlscan_push_new_buffer(cur_state, value, varname);
740 : /* yy_scan_string already made buffer active */
741 : }
742 : free(value);
743 : }
744 : else
745 : {
746 : /*
747 : * if the variable doesn't exist we'll copy the string
748 : * as is
749 : */
750 : ECHO;
751 : }
752 :
753 : free(varname);
754 : }
755 :
756 : :'{variable_char}+' {
757 : psqlscan_escape_variable(cur_state, yytext, yyleng,
758 : PQUOTE_SQL_LITERAL);
759 : }
760 :
761 : :\"{variable_char}+\" {
762 : psqlscan_escape_variable(cur_state, yytext, yyleng,
763 : PQUOTE_SQL_IDENT);
764 : }
765 :
766 : :\{\?{variable_char}+\} {
767 : psqlscan_test_variable(cur_state, yytext, yyleng);
768 : }
769 :
770 : /*
771 : * These rules just avoid the need for scanner backup if one of the
772 : * three rules above fails to match completely.
773 : */
774 :
775 : :'{variable_char}* {
776 : /* Throw back everything but the colon */
777 : yyless(1);
778 : ECHO;
779 : }
780 :
781 : :\"{variable_char}* {
782 : /* Throw back everything but the colon */
783 : yyless(1);
784 : ECHO;
785 : }
786 :
787 : :\{\?{variable_char}* {
788 : /* Throw back everything but the colon */
789 : yyless(1);
790 : ECHO;
791 : }
792 : :\{ {
793 : /* Throw back everything but the colon */
794 : yyless(1);
795 : ECHO;
796 : }
797 :
798 : /*
799 : * Back to backend-compatible rules.
800 : */
801 :
802 : {self} {
803 : ECHO;
804 : }
805 :
806 : {operator} {
807 : /*
808 : * Check for embedded slash-star or dash-dash; those
809 : * are comment starts, so operator must stop there.
810 : * Note that slash-star or dash-dash at the first
811 : * character will match a prior rule, not this one.
812 : */
813 : int nchars = yyleng;
814 : char *slashstar = strstr(yytext, "/*");
815 : char *dashdash = strstr(yytext, "--");
816 :
817 : if (slashstar && dashdash)
818 : {
819 : /* if both appear, take the first one */
820 : if (slashstar > dashdash)
821 : slashstar = dashdash;
822 : }
823 : else if (!slashstar)
824 : slashstar = dashdash;
825 : if (slashstar)
826 : nchars = slashstar - yytext;
827 :
828 : /*
829 : * For SQL compatibility, '+' and '-' cannot be the
830 : * last char of a multi-char operator unless the operator
831 : * contains chars that are not in SQL operators.
832 : * The idea is to lex '=-' as two operators, but not
833 : * to forbid operator names like '?-' that could not be
834 : * sequences of SQL operators.
835 : */
836 : if (nchars > 1 &&
837 : (yytext[nchars - 1] == '+' ||
838 : yytext[nchars - 1] == '-'))
839 : {
840 : int ic;
841 :
842 : for (ic = nchars - 2; ic >= 0; ic--)
843 : {
844 : char c = yytext[ic];
845 : if (c == '~' || c == '!' || c == '@' ||
846 : c == '#' || c == '^' || c == '&' ||
847 : c == '|' || c == '`' || c == '?' ||
848 : c == '%')
849 : break;
850 : }
851 : if (ic < 0)
852 : {
853 : /*
854 : * didn't find a qualifying character, so remove
855 : * all trailing [+-]
856 : */
857 : do {
858 : nchars--;
859 : } while (nchars > 1 &&
860 : (yytext[nchars - 1] == '+' ||
861 : yytext[nchars - 1] == '-'));
862 : }
863 : }
864 :
865 : if (nchars < yyleng)
866 : {
867 : /* Strip the unwanted chars from the token */
868 : yyless(nchars);
869 : }
870 : ECHO;
871 : }
872 :
873 : {param} {
874 : ECHO;
875 : }
876 : {param_junk} {
877 : ECHO;
878 : }
879 :
880 : {decinteger} {
881 : ECHO;
882 : }
883 : {hexinteger} {
884 : ECHO;
885 : }
886 : {octinteger} {
887 : ECHO;
888 : }
889 : {bininteger} {
890 : ECHO;
891 : }
892 : {hexfail} {
893 : ECHO;
894 : }
895 : {octfail} {
896 : ECHO;
897 : }
898 : {binfail} {
899 : ECHO;
900 : }
901 : {numeric} {
902 : ECHO;
903 : }
904 : {numericfail} {
905 : /* throw back the .., and treat as integer */
906 : yyless(yyleng - 2);
907 : ECHO;
908 : }
909 : {real} {
910 : ECHO;
911 : }
912 : {realfail} {
913 : ECHO;
914 : }
915 : {integer_junk} {
916 : ECHO;
917 : }
918 : {numeric_junk} {
919 : ECHO;
920 : }
921 : {real_junk} {
922 : ECHO;
923 : }
924 :
925 :
926 : {identifier} {
927 : /*
928 : * We need to track if we are inside a BEGIN .. END block
929 : * in a function definition, so that semicolons contained
930 : * therein don't terminate the whole statement. Short of
931 : * writing a full parser here, the following heuristic
932 : * should work. First, we track whether the beginning of
933 : * the statement matches CREATE [OR REPLACE]
934 : * {FUNCTION|PROCEDURE}
935 : */
936 :
937 : if (cur_state->identifier_count == 0)
938 : memset(cur_state->identifiers, 0, sizeof(cur_state->identifiers));
939 :
940 : if (pg_strcasecmp(yytext, "create") == 0 ||
941 : pg_strcasecmp(yytext, "function") == 0 ||
942 : pg_strcasecmp(yytext, "procedure") == 0 ||
943 : pg_strcasecmp(yytext, "or") == 0 ||
944 : pg_strcasecmp(yytext, "replace") == 0)
945 : {
946 : if (cur_state->identifier_count < sizeof(cur_state->identifiers))
947 : cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned char) yytext[0]);
948 : }
949 :
950 : cur_state->identifier_count++;
951 :
952 : if (cur_state->identifiers[0] == 'c' &&
953 : (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' ||
954 : (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' &&
955 : (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p'))) &&
956 : cur_state->paren_depth == 0)
957 : {
958 : if (pg_strcasecmp(yytext, "begin") == 0)
959 : cur_state->begin_depth++;
960 : else if (pg_strcasecmp(yytext, "case") == 0)
961 : {
962 : /*
963 : * CASE also ends with END. We only need to track
964 : * this if we are already inside a BEGIN.
965 : */
966 : if (cur_state->begin_depth >= 1)
967 : cur_state->begin_depth++;
968 : }
969 : else if (pg_strcasecmp(yytext, "end") == 0)
970 : {
971 : if (cur_state->begin_depth > 0)
972 : cur_state->begin_depth--;
973 : }
974 : }
975 :
976 : ECHO;
977 : }
978 :
979 : {other} {
980 : ECHO;
981 : }
982 :
983 : <<EOF>> {
984 : if (cur_state->buffer_stack == NULL)
985 : {
986 : cur_state->start_state = YY_START;
987 : return LEXRES_EOL; /* end of input reached */
988 : }
989 :
990 : /*
991 : * We were expanding a variable, so pop the inclusion
992 : * stack and keep lexing
993 : */
994 : psqlscan_pop_buffer_stack(cur_state);
995 : psqlscan_select_top_buffer(cur_state);
996 : }
997 :
998 : %%
999 :
1000 : /* LCOV_EXCL_STOP */
1001 :
1002 : /*
1003 : * Create a lexer working state struct.
1004 : *
1005 : * callbacks is a struct of function pointers that encapsulate some
1006 : * behavior we need from the surrounding program. This struct must
1007 : * remain valid for the lifespan of the PsqlScanState.
1008 : */
1009 : PsqlScanState
1010 : psql_scan_create(const PsqlScanCallbacks *callbacks)
1011 18380 : {
1012 : PsqlScanState state;
1013 :
1014 : state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
1015 18380 :
1016 : state->callbacks = callbacks;
1017 18380 :
1018 : yylex_init(&state->scanner);
1019 18380 :
1020 : yyset_extra(state, state->scanner);
1021 18380 :
1022 : psql_scan_reset(state);
1023 18380 :
1024 : return state;
1025 18380 : }
1026 :
1027 : /*
1028 : * Destroy a lexer working state struct, releasing all resources.
1029 : */
1030 : void
1031 : psql_scan_destroy(PsqlScanState state)
1032 18278 : {
1033 : psql_scan_finish(state);
1034 18278 :
1035 : psql_scan_reset(state);
1036 18278 :
1037 : yylex_destroy(state->scanner);
1038 18278 :
1039 : free(state);
1040 18278 : }
1041 18278 :
1042 : /*
1043 : * Set the callback passthrough pointer for the lexer.
1044 : *
1045 : * This could have been integrated into psql_scan_create, but keeping it
1046 : * separate allows the application to change the pointer later, which might
1047 : * be useful.
1048 : */
1049 : void
1050 : psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
1051 17840 : {
1052 : state->cb_passthrough = passthrough;
1053 17840 : }
1054 17840 :
1055 : /*
1056 : * Set up to perform lexing of the given input line.
1057 : *
1058 : * The text at *line, extending for line_len bytes, will be scanned by
1059 : * subsequent calls to the psql_scan routines. psql_scan_finish should
1060 : * be called when scanning is complete. Note that the lexer retains
1061 : * a pointer to the storage at *line --- this string must not be altered
1062 : * or freed until after psql_scan_finish is called.
1063 : *
1064 : * encoding is the libpq identifier for the character encoding in use,
1065 : * and std_strings says whether standard_conforming_strings is on.
1066 : */
1067 : void
1068 : psql_scan_setup(PsqlScanState state,
1069 626798 : const char *line, int line_len,
1070 : int encoding, bool std_strings)
1071 : {
1072 : /* Mustn't be scanning already */
1073 : Assert(state->scanbufhandle == NULL);
1074 : Assert(state->buffer_stack == NULL);
1075 :
1076 : /* Do we need to hack the character set encoding? */
1077 : state->encoding = encoding;
1078 626798 : state->safe_encoding = pg_valid_server_encoding_id(encoding);
1079 626798 :
1080 : /* Save standard-strings flag as well */
1081 : state->std_strings = std_strings;
1082 626798 :
1083 : /* Set up flex input buffer with appropriate translation and padding */
1084 : state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
1085 626798 : &state->scanbuf);
1086 : state->scanline = line;
1087 626798 :
1088 : /* Set lookaside data in case we have to map unsafe encoding */
1089 : state->curline = state->scanbuf;
1090 626798 : state->refline = state->scanline;
1091 626798 : }
1092 626798 :
1093 : /*
1094 : * Do lexical analysis of SQL command text.
1095 : *
1096 : * The text previously passed to psql_scan_setup is scanned, and appended
1097 : * (possibly with transformation) to query_buf.
1098 : *
1099 : * The return value indicates the condition that stopped scanning:
1100 : *
1101 : * PSCAN_SEMICOLON: found a command-ending semicolon. (The semicolon is
1102 : * transferred to query_buf.) The command accumulated in query_buf should
1103 : * be executed, then clear query_buf and call again to scan the remainder
1104 : * of the line.
1105 : *
1106 : * PSCAN_BACKSLASH: found a backslash that starts a special command.
1107 : * Any previous data on the line has been transferred to query_buf.
1108 : * The caller will typically next apply a separate flex lexer to scan
1109 : * the special command.
1110 : *
1111 : * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
1112 : * incomplete SQL command. *prompt is set to the appropriate prompt type.
1113 : *
1114 : * PSCAN_EOL: the end of the line was reached, and there is no lexical
1115 : * reason to consider the command incomplete. The caller may or may not
1116 : * choose to send it. *prompt is set to the appropriate prompt type if
1117 : * the caller chooses to collect more input.
1118 : *
1119 : * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
1120 : * be called next, then the cycle may be repeated with a fresh input line.
1121 : *
1122 : * In all cases, *prompt is set to an appropriate prompt type code for the
1123 : * next line-input operation.
1124 : */
1125 : PsqlScanResult
1126 : psql_scan(PsqlScanState state,
1127 957702 : PQExpBuffer query_buf,
1128 : promptStatus_t *prompt)
1129 : {
1130 : PsqlScanResult result;
1131 : int lexresult;
1132 :
1133 : /* Must be scanning already */
1134 : Assert(state->scanbufhandle != NULL);
1135 :
1136 : /* Set current output target */
1137 : state->output_buf = query_buf;
1138 957702 :
1139 : /* Set input source */
1140 : if (state->buffer_stack != NULL)
1141 957702 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
1142 90 : else
1143 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1144 957612 :
1145 : /* And lex. */
1146 : lexresult = yylex(NULL, state->scanner);
1147 957702 :
1148 : /*
1149 : * Check termination state and return appropriate result info.
1150 : */
1151 : switch (lexresult)
1152 957702 : {
1153 : case LEXRES_EOL: /* end of input */
1154 626546 : switch (state->start_state)
1155 626546 : {
1156 : case INITIAL:
1157 579730 : case xqs: /* we treat this like INITIAL */
1158 : if (state->paren_depth > 0)
1159 579730 : {
1160 : result = PSCAN_INCOMPLETE;
1161 47784 : *prompt = PROMPT_PAREN;
1162 47784 : }
1163 : else if (state->begin_depth > 0)
1164 531946 : {
1165 : result = PSCAN_INCOMPLETE;
1166 452 : *prompt = PROMPT_CONTINUE;
1167 452 : }
1168 : else if (query_buf->len > 0)
1169 531494 : {
1170 : result = PSCAN_EOL;
1171 105890 : *prompt = PROMPT_CONTINUE;
1172 105890 : }
1173 : else
1174 : {
1175 : /* never bother to send an empty buffer */
1176 : result = PSCAN_INCOMPLETE;
1177 425604 : *prompt = PROMPT_READY;
1178 425604 : }
1179 : break;
1180 579730 : case xb:
1181 0 : result = PSCAN_INCOMPLETE;
1182 0 : *prompt = PROMPT_SINGLEQUOTE;
1183 0 : break;
1184 0 : case xc:
1185 794 : result = PSCAN_INCOMPLETE;
1186 794 : *prompt = PROMPT_COMMENT;
1187 794 : break;
1188 794 : case xd:
1189 26 : result = PSCAN_INCOMPLETE;
1190 26 : *prompt = PROMPT_DOUBLEQUOTE;
1191 26 : break;
1192 26 : case xh:
1193 0 : result = PSCAN_INCOMPLETE;
1194 0 : *prompt = PROMPT_SINGLEQUOTE;
1195 0 : break;
1196 0 : case xe:
1197 602 : result = PSCAN_INCOMPLETE;
1198 602 : *prompt = PROMPT_SINGLEQUOTE;
1199 602 : break;
1200 602 : case xq:
1201 9502 : result = PSCAN_INCOMPLETE;
1202 9502 : *prompt = PROMPT_SINGLEQUOTE;
1203 9502 : break;
1204 9502 : case xdolq:
1205 35892 : result = PSCAN_INCOMPLETE;
1206 35892 : *prompt = PROMPT_DOLLARQUOTE;
1207 35892 : break;
1208 35892 : case xui:
1209 0 : result = PSCAN_INCOMPLETE;
1210 0 : *prompt = PROMPT_DOUBLEQUOTE;
1211 0 : break;
1212 0 : case xus:
1213 0 : result = PSCAN_INCOMPLETE;
1214 0 : *prompt = PROMPT_SINGLEQUOTE;
1215 0 : break;
1216 0 : default:
1217 0 : /* can't get here */
1218 : fprintf(stderr, "invalid YY_START\n");
1219 0 : exit(1);
1220 0 : }
1221 : break;
1222 626546 : case LEXRES_SEMI: /* semicolon */
1223 316356 : result = PSCAN_SEMICOLON;
1224 316356 : *prompt = PROMPT_READY;
1225 316356 : break;
1226 316356 : case LEXRES_BACKSLASH: /* backslash */
1227 14800 : result = PSCAN_BACKSLASH;
1228 14800 : *prompt = PROMPT_READY;
1229 14800 : break;
1230 14800 : default:
1231 0 : /* can't get here */
1232 : fprintf(stderr, "invalid yylex result\n");
1233 0 : exit(1);
1234 0 : }
1235 :
1236 : return result;
1237 957702 : }
1238 :
1239 : /*
1240 : * Clean up after scanning a string. This flushes any unread input and
1241 : * releases resources (but not the PsqlScanState itself). Note however
1242 : * that this does not reset the lexer scan state; that can be done by
1243 : * psql_scan_reset(), which is an orthogonal operation.
1244 : *
1245 : * It is legal to call this when not scanning anything (makes it easier
1246 : * to deal with error recovery).
1247 : */
1248 : void
1249 : psql_scan_finish(PsqlScanState state)
1250 644972 : {
1251 : /* Drop any incomplete variable expansions. */
1252 : while (state->buffer_stack != NULL)
1253 644972 : psqlscan_pop_buffer_stack(state);
1254 0 :
1255 : /* Done with the outer scan buffer, too */
1256 : if (state->scanbufhandle)
1257 644972 : yy_delete_buffer(state->scanbufhandle, state->scanner);
1258 626698 : state->scanbufhandle = NULL;
1259 644972 : if (state->scanbuf)
1260 644972 : free(state->scanbuf);
1261 626698 : state->scanbuf = NULL;
1262 644972 : }
1263 644972 :
1264 : /*
1265 : * Reset lexer scanning state to start conditions. This is appropriate
1266 : * for executing \r psql commands (or any other time that we discard the
1267 : * prior contents of query_buf). It is not, however, necessary to do this
1268 : * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1269 : * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1270 : * conditions are returned.
1271 : *
1272 : * Note that this is unrelated to flushing unread input; that task is
1273 : * done by psql_scan_finish().
1274 : */
1275 : void
1276 : psql_scan_reset(PsqlScanState state)
1277 37992 : {
1278 : state->start_state = INITIAL;
1279 37992 : state->paren_depth = 0;
1280 37992 : state->xcdepth = 0; /* not really necessary */
1281 37992 : if (state->dolqstart)
1282 37992 : free(state->dolqstart);
1283 0 : state->dolqstart = NULL;
1284 37992 : state->identifier_count = 0;
1285 37992 : state->begin_depth = 0;
1286 37992 : }
1287 37992 :
1288 : /*
1289 : * Reselect this lexer (psqlscan.l) after using another one.
1290 : *
1291 : * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
1292 : * state, because we'd never switch to another lexer in a different state.
1293 : * However, we don't want to reset e.g. paren_depth, so this can't be
1294 : * the same as psql_scan_reset().
1295 : *
1296 : * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
1297 : * must be a superset of this.
1298 : *
1299 : * Note: it seems likely that other lexers could just assign INITIAL for
1300 : * themselves, since that probably has the value zero in every flex-generated
1301 : * lexer. But let's not assume that.
1302 : */
1303 : void
1304 : psql_scan_reselect_sql_lexer(PsqlScanState state)
1305 66388 : {
1306 : state->start_state = INITIAL;
1307 66388 : }
1308 66388 :
1309 : /*
1310 : * Return true if lexer is currently in an "inside quotes" state.
1311 : *
1312 : * This is pretty grotty but is needed to preserve the old behavior
1313 : * that mainloop.c drops blank lines not inside quotes without even
1314 : * echoing them.
1315 : */
1316 : bool
1317 : psql_scan_in_quote(PsqlScanState state)
1318 130406 : {
1319 : return state->start_state != INITIAL &&
1320 131368 : state->start_state != xqs;
1321 962 : }
1322 :
1323 : /*
1324 : * Push the given string onto the stack of stuff to scan.
1325 : *
1326 : * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1327 : */
1328 : void
1329 : psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
1330 1214 : const char *varname)
1331 : {
1332 : StackElem *stackelem;
1333 :
1334 : stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
1335 1214 :
1336 : /*
1337 : * In current usage, the passed varname points at the current flex input
1338 : * buffer; we must copy it before calling psqlscan_prepare_buffer()
1339 : * because that will change the buffer state.
1340 : */
1341 : stackelem->varname = varname ? pg_strdup(varname) : NULL;
1342 1214 :
1343 : stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
1344 1214 : &stackelem->bufstring);
1345 : state->curline = stackelem->bufstring;
1346 1214 : if (state->safe_encoding)
1347 1214 : {
1348 : stackelem->origstring = NULL;
1349 1214 : state->refline = stackelem->bufstring;
1350 1214 : }
1351 : else
1352 : {
1353 : stackelem->origstring = pg_strdup(newstr);
1354 0 : state->refline = stackelem->origstring;
1355 0 : }
1356 : stackelem->next = state->buffer_stack;
1357 1214 : state->buffer_stack = stackelem;
1358 1214 : }
1359 1214 :
1360 : /*
1361 : * Pop the topmost buffer stack item (there must be one!)
1362 : *
1363 : * NB: after this, the flex input state is unspecified; caller must
1364 : * switch to an appropriate buffer to continue lexing.
1365 : * See psqlscan_select_top_buffer().
1366 : */
1367 : void
1368 : psqlscan_pop_buffer_stack(PsqlScanState state)
1369 1214 : {
1370 : StackElem *stackelem = state->buffer_stack;
1371 1214 :
1372 : state->buffer_stack = stackelem->next;
1373 1214 : yy_delete_buffer(stackelem->buf, state->scanner);
1374 1214 : free(stackelem->bufstring);
1375 1214 : if (stackelem->origstring)
1376 1214 : free(stackelem->origstring);
1377 0 : if (stackelem->varname)
1378 1214 : free(stackelem->varname);
1379 1214 : free(stackelem);
1380 1214 : }
1381 1214 :
1382 : /*
1383 : * Select the topmost surviving buffer as the active input.
1384 : */
1385 : void
1386 : psqlscan_select_top_buffer(PsqlScanState state)
1387 1214 : {
1388 : StackElem *stackelem = state->buffer_stack;
1389 1214 :
1390 : if (stackelem != NULL)
1391 1214 : {
1392 : yy_switch_to_buffer(stackelem->buf, state->scanner);
1393 0 : state->curline = stackelem->bufstring;
1394 0 : state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
1395 0 : }
1396 : else
1397 : {
1398 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1399 1214 : state->curline = state->scanbuf;
1400 1214 : state->refline = state->scanline;
1401 1214 : }
1402 : }
1403 1214 :
1404 : /*
1405 : * Check if specified variable name is the source for any string
1406 : * currently being scanned
1407 : */
1408 : bool
1409 : psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
1410 1214 : {
1411 : StackElem *stackelem;
1412 :
1413 : for (stackelem = state->buffer_stack;
1414 1214 : stackelem != NULL;
1415 : stackelem = stackelem->next)
1416 0 : {
1417 : if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
1418 0 : return true;
1419 0 : }
1420 : return false;
1421 1214 : }
1422 :
1423 : /*
1424 : * Set up a flex input buffer to scan the given data. We always make a
1425 : * copy of the data. If working in an unsafe encoding, the copy has
1426 : * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1427 : *
1428 : * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1429 : */
1430 : YY_BUFFER_STATE
1431 : psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
1432 628012 : char **txtcopy)
1433 : {
1434 : char *newtxt;
1435 :
1436 : /* Flex wants two \0 characters after the actual data */
1437 : newtxt = pg_malloc(len + 2);
1438 628012 : *txtcopy = newtxt;
1439 628012 : newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1440 628012 :
1441 : if (state->safe_encoding)
1442 628012 : memcpy(newtxt, txt, len);
1443 628012 : else
1444 : {
1445 : /* Gotta do it the hard way */
1446 : int i = 0;
1447 0 :
1448 : while (i < len)
1449 0 : {
1450 : int thislen = PQmblen(txt + i, state->encoding);
1451 0 :
1452 : /* first byte should always be okay... */
1453 : newtxt[i] = txt[i];
1454 0 : i++;
1455 0 : while (--thislen > 0 && i < len)
1456 0 : newtxt[i++] = (char) 0xFF;
1457 0 : }
1458 : }
1459 :
1460 : return yy_scan_buffer(newtxt, len + 2, state->scanner);
1461 628012 : }
1462 :
1463 : /*
1464 : * psqlscan_emit() --- body for ECHO macro
1465 : *
1466 : * NB: this must be used for ALL and ONLY the text copied from the flex
1467 : * input data. If you pass it something that is not part of the yytext
1468 : * string, you are making a mistake. Internally generated text can be
1469 : * appended directly to state->output_buf.
1470 : */
1471 : void
1472 : psqlscan_emit(PsqlScanState state, const char *txt, int len)
1473 7507080 : {
1474 : PQExpBuffer output_buf = state->output_buf;
1475 7507080 :
1476 : if (state->safe_encoding)
1477 7507080 : appendBinaryPQExpBuffer(output_buf, txt, len);
1478 7507080 : else
1479 : {
1480 : /* Gotta do it the hard way */
1481 : const char *reference = state->refline;
1482 0 : int i;
1483 :
1484 : reference += (txt - state->curline);
1485 0 :
1486 : for (i = 0; i < len; i++)
1487 0 : {
1488 : char ch = txt[i];
1489 0 :
1490 : if (ch == (char) 0xFF)
1491 0 : ch = reference[i];
1492 0 : appendPQExpBufferChar(output_buf, ch);
1493 0 : }
1494 : }
1495 : }
1496 7507080 :
1497 : /*
1498 : * psqlscan_extract_substring --- fetch value of (part of) the current token
1499 : *
1500 : * This is like psqlscan_emit(), except that the data is returned as a
1501 : * malloc'd string rather than being pushed directly to state->output_buf.
1502 : */
1503 : char *
1504 : psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
1505 4826 : {
1506 : char *result = (char *) pg_malloc(len + 1);
1507 4826 :
1508 : if (state->safe_encoding)
1509 4826 : memcpy(result, txt, len);
1510 4826 : else
1511 : {
1512 : /* Gotta do it the hard way */
1513 : const char *reference = state->refline;
1514 0 : int i;
1515 :
1516 : reference += (txt - state->curline);
1517 0 :
1518 : for (i = 0; i < len; i++)
1519 0 : {
1520 : char ch = txt[i];
1521 0 :
1522 : if (ch == (char) 0xFF)
1523 0 : ch = reference[i];
1524 0 : result[i] = ch;
1525 0 : }
1526 : }
1527 : result[len] = '\0';
1528 4826 : return result;
1529 4826 : }
1530 :
1531 : /*
1532 : * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
1533 : *
1534 : * If the variable name is found, escape its value using the appropriate
1535 : * quoting method and emit the value to output_buf. (Since the result is
1536 : * surely quoted, there is never any reason to rescan it.) If we don't
1537 : * find the variable or escaping fails, emit the token as-is.
1538 : */
1539 : void
1540 : psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
1541 870 : PsqlScanQuoteType quote)
1542 : {
1543 : char *varname;
1544 : char *value;
1545 :
1546 : /* Variable lookup. */
1547 : varname = psqlscan_extract_substring(state, txt + 2, len - 3);
1548 870 : if (state->callbacks->get_variable)
1549 870 : value = state->callbacks->get_variable(varname, quote,
1550 870 : state->cb_passthrough);
1551 : else
1552 : value = NULL;
1553 0 : free(varname);
1554 870 :
1555 : if (value)
1556 870 : {
1557 : /* Emit the suitably-escaped value */
1558 : appendPQExpBufferStr(state->output_buf, value);
1559 814 : free(value);
1560 814 : }
1561 : else
1562 : {
1563 : /* Emit original token as-is */
1564 : psqlscan_emit(state, txt, len);
1565 56 : }
1566 : }
1567 870 :
1568 : void
1569 : psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
1570 32 : {
1571 : char *varname;
1572 : char *value;
1573 :
1574 : varname = psqlscan_extract_substring(state, txt + 3, len - 4);
1575 32 : if (state->callbacks->get_variable)
1576 32 : value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
1577 32 : state->cb_passthrough);
1578 : else
1579 : value = NULL;
1580 0 : free(varname);
1581 32 :
1582 : if (value != NULL)
1583 32 : {
1584 : psqlscan_emit(state, "TRUE", 4);
1585 14 : free(value);
1586 14 : }
1587 : else
1588 : {
1589 : psqlscan_emit(state, "FALSE", 5);
1590 18 : }
1591 : }
|