Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * psqlscan.l
5 : * lexical scanner for SQL commands
6 : *
7 : * This lexer used to be part of psql, and that heritage is reflected in
8 : * the file name as well as function and typedef names, though it can now
9 : * be used by other frontend programs as well. It's also possible to extend
10 : * this lexer with a compatible add-on lexer to handle program-specific
11 : * backslash commands.
12 : *
13 : * This code is mainly concerned with determining where the end of a SQL
14 : * statement is: we are looking for semicolons that are not within quotes,
15 : * comments, or parentheses. The most reliable way to handle this is to
16 : * borrow the backend's flex lexer rules, lock, stock, and barrel. The rules
17 : * below are (except for a few) the same as the backend's, but their actions
18 : * are just ECHO whereas the backend's actions generally do other things.
19 : *
20 : * XXX The rules in this file must be kept in sync with the backend lexer!!!
21 : *
22 : * XXX Avoid creating backtracking cases --- see the backend lexer for info.
23 : *
24 : * See psqlscan_int.h for additional commentary.
25 : *
26 : *
27 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
28 : * Portions Copyright (c) 1994, Regents of the University of California
29 : *
30 : * IDENTIFICATION
31 : * src/fe_utils/psqlscan.l
32 : *
33 : *-------------------------------------------------------------------------
34 : */
35 : #include "postgres_fe.h"
36 :
37 : #include "common/logging.h"
38 : #include "fe_utils/psqlscan.h"
39 :
40 : #include "libpq-fe.h"
41 : }
42 :
43 : %{
44 :
45 : /* LCOV_EXCL_START */
46 :
47 : #include "fe_utils/psqlscan_int.h"
48 :
49 : /*
50 : * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
51 : * doesn't presently make use of that argument, so just declare it as int.
52 : */
53 : typedef int YYSTYPE;
54 :
55 :
56 : /* Return values from yylex() */
57 : #define LEXRES_EOL 0 /* end of input */
58 : #define LEXRES_SEMI 1 /* command-terminating semicolon found */
59 : #define LEXRES_BACKSLASH 2 /* backslash command start */
60 :
61 :
62 : #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
63 :
64 : %}
65 :
66 : %option reentrant
67 : %option bison-bridge
68 : %option 8bit
69 : %option never-interactive
70 : %option nodefault
71 : %option noinput
72 : %option nounput
73 : %option noyywrap
74 : %option warn
75 : %option prefix="psql_yy"
76 :
77 : /*
78 : * Set the type of yyextra; we use it as a pointer back to the containing
79 : * PsqlScanState.
80 : */
81 : %option extra-type="PsqlScanState"
82 :
83 : /*
84 : * All of the following definitions and rules should exactly match
85 : * src/backend/parser/scan.l so far as the flex patterns are concerned.
86 : * The rule bodies are just ECHO as opposed to what the backend does,
87 : * however. (But be sure to duplicate code that affects the lexing process,
88 : * such as BEGIN() and yyless().) Also, psqlscan uses a single <<EOF>> rule
89 : * whereas scan.l has a separate one for each exclusive state.
90 : */
91 :
92 : /*
93 : * OK, here is a short description of lex/flex rules behavior.
94 : * The longest pattern which matches an input string is always chosen.
95 : * For equal-length patterns, the first occurring in the rules list is chosen.
96 : * INITIAL is the starting state, to which all non-conditional rules apply.
97 : * Exclusive states change parsing rules while the state is active. When in
98 : * an exclusive state, only those rules defined for that state apply.
99 : *
100 : * We use exclusive states for quoted strings, extended comments,
101 : * and to eliminate parsing troubles for numeric strings.
102 : * Exclusive states:
103 : * <xb> bit string literal
104 : * <xc> extended C-style comments
105 : * <xd> delimited identifiers (double-quoted identifiers)
106 : * <xh> hexadecimal byte string
107 : * <xq> standard quoted strings
108 : * <xqs> quote stop (detect continued strings)
109 : * <xe> extended quoted strings (support backslash escape sequences)
110 : * <xdolq> $foo$ quoted strings
111 : * <xui> quoted identifier with Unicode escapes
112 : * <xus> quoted string with Unicode escapes
113 : *
114 : * Note: we intentionally don't mimic the backend's <xeu> state; we have
115 : * no need to distinguish it from <xe> state, and no good way to get out
116 : * of it in error cases. The backend just throws yyerror() in those
117 : * cases, but that's not an option here.
118 : */
119 :
120 : %x xb
121 : %x xc
122 : %x xd
123 : %x xh
124 : %x xq
125 : %x xqs
126 : %x xe
127 : %x xdolq
128 : %x xui
129 : %x xus
130 :
131 : /*
132 : * In order to make the world safe for Windows and Mac clients as well as
133 : * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
134 : * sequence will be seen as two successive newlines, but that doesn't cause
135 : * any problems. Comments that start with -- and extend to the next
136 : * newline are treated as equivalent to a single whitespace character.
137 : *
138 : * NOTE a fine point: if there is no newline following --, we will absorb
139 : * everything to the end of the input as a comment. This is correct. Older
140 : * versions of Postgres failed to recognize -- as a comment if the input
141 : * did not end with a newline.
142 : *
143 : * non_newline_space tracks all space characters except newlines.
144 : *
145 : * XXX if you change the set of whitespace characters, fix scanner_isspace()
146 : * to agree.
147 : */
148 :
149 : space [ \t\n\r\f\v]
150 : non_newline_space [ \t\f\v]
151 : newline [\n\r]
152 : non_newline [^\n\r]
153 :
154 : comment ("--"{non_newline}*)
155 :
156 : whitespace ({space}+|{comment})
157 :
158 : /*
159 : * SQL requires at least one newline in the whitespace separating
160 : * string literals that are to be concatenated. Silly, but who are we
161 : * to argue? Note that {whitespace_with_newline} should not have * after
162 : * it, whereas {whitespace} should generally have a * after it...
163 : */
164 :
165 : special_whitespace ({space}+|{comment}{newline})
166 : non_newline_whitespace ({non_newline_space}|{comment})
167 : whitespace_with_newline ({non_newline_whitespace}*{newline}{special_whitespace}*)
168 :
169 : quote '
170 : /* If we see {quote} then {quotecontinue}, the quoted string continues */
171 : quotecontinue {whitespace_with_newline}{quote}
172 :
173 : /*
174 : * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
175 : * {quotecontinue}. It might seem that this could just be {whitespace}*,
176 : * but if there's a dash after {whitespace_with_newline}, it must be consumed
177 : * to see if there's another dash --- which would start a {comment} and thus
178 : * allow continuation of the {quotecontinue} token.
179 : */
180 : quotecontinuefail {whitespace}*"-"?
181 :
182 : /* Bit string
183 : * It is tempting to scan the string for only those characters
184 : * which are allowed. However, this leads to silently swallowed
185 : * characters if illegal characters are included in the string.
186 : * For example, if xbinside is [01] then B'ABCD' is interpreted
187 : * as a zero-length string, and the ABCD' is lost!
188 : * Better to pass the string forward and let the input routines
189 : * validate the contents.
190 : */
191 : xbstart [bB]{quote}
192 : xbinside [^']*
193 :
194 : /* Hexadecimal byte string */
195 : xhstart [xX]{quote}
196 : xhinside [^']*
197 :
198 : /* National character */
199 : xnstart [nN]{quote}
200 :
201 : /* Quoted string that allows backslash escapes */
202 : xestart [eE]{quote}
203 : xeinside [^\\']+
204 : xeescape [\\][^0-7]
205 : xeoctesc [\\][0-7]{1,3}
206 : xehexesc [\\]x[0-9A-Fa-f]{1,2}
207 : xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
208 : xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
209 :
210 : /* Extended quote
211 : * xqdouble implements embedded quote, ''''
212 : */
213 : xqstart {quote}
214 : xqdouble {quote}{quote}
215 : xqinside [^']+
216 :
217 : /* $foo$ style quotes ("dollar quoting")
218 : * The quoted string starts with $foo$ where "foo" is an optional string
219 : * in the form of an identifier, except that it may not contain "$",
220 : * and extends to the first occurrence of an identical string.
221 : * There is *no* processing of the quoted text.
222 : *
223 : * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
224 : * fails to match its trailing "$".
225 : */
226 : dolq_start [A-Za-z\200-\377_]
227 : dolq_cont [A-Za-z\200-\377_0-9]
228 : dolqdelim \$({dolq_start}{dolq_cont}*)?\$
229 : dolqfailed \${dolq_start}{dolq_cont}*
230 : dolqinside [^$]+
231 :
232 : /* Double quote
233 : * Allows embedded spaces and other special characters into identifiers.
234 : */
235 : dquote \"
236 : xdstart {dquote}
237 : xdstop {dquote}
238 : xddouble {dquote}{dquote}
239 : xdinside [^"]+
240 :
241 : /* Quoted identifier with Unicode escapes */
242 : xuistart [uU]&{dquote}
243 :
244 : /* Quoted string with Unicode escapes */
245 : xusstart [uU]&{quote}
246 :
247 : /* error rule to avoid backup */
248 : xufailed [uU]&
249 :
250 :
251 : /* C-style comments
252 : *
253 : * The "extended comment" syntax closely resembles allowable operator syntax.
254 : * The tricky part here is to get lex to recognize a string starting with
255 : * slash-star as a comment, when interpreting it as an operator would produce
256 : * a longer match --- remember lex will prefer a longer match! Also, if we
257 : * have something like plus-slash-star, lex will think this is a 3-character
258 : * operator whereas we want to see it as a + operator and a comment start.
259 : * The solution is two-fold:
260 : * 1. append {op_chars}* to xcstart so that it matches as much text as
261 : * {operator} would. Then the tie-breaker (first matching rule of same
262 : * length) ensures xcstart wins. We put back the extra stuff with yyless()
263 : * in case it contains a star-slash that should terminate the comment.
264 : * 2. In the operator rule, check for slash-star within the operator, and
265 : * if found throw it back with yyless(). This handles the plus-slash-star
266 : * problem.
267 : * Dash-dash comments have similar interactions with the operator rule.
268 : */
269 : xcstart \/\*{op_chars}*
270 : xcstop \*+\/
271 : xcinside [^*/]+
272 :
273 : ident_start [A-Za-z\200-\377_]
274 : ident_cont [A-Za-z\200-\377_0-9\$]
275 :
276 : identifier {ident_start}{ident_cont}*
277 :
278 : /* Assorted special-case operators and operator-like tokens */
279 : typecast "::"
280 : dot_dot \.\.
281 : colon_equals ":="
282 :
283 : /*
284 : * These operator-like tokens (unlike the above ones) also match the {operator}
285 : * rule, which means that they might be overridden by a longer match if they
286 : * are followed by a comment start or a + or - character. Accordingly, if you
287 : * add to this list, you must also add corresponding code to the {operator}
288 : * block to return the correct token in such cases. (This is not needed in
289 : * psqlscan.l since the token value is ignored there.)
290 : */
291 : equals_greater "=>"
292 : less_equals "<="
293 : greater_equals ">="
294 : less_greater "<>"
295 : not_equals "!="
296 :
297 : /*
298 : * "self" is the set of chars that should be returned as single-character
299 : * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
300 : * which can be one or more characters long (but if a single-char token
301 : * appears in the "self" set, it is not to be returned as an Op). Note
302 : * that the sets overlap, but each has some chars that are not in the other.
303 : *
304 : * If you change either set, adjust the character lists appearing in the
305 : * rule for "operator"!
306 : */
307 : self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
308 : op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
309 : operator {op_chars}+
310 :
311 : /*
312 : * Numbers
313 : *
314 : * Unary minus is not part of a number here. Instead we pass it separately to
315 : * the parser, and there it gets coerced via doNegate().
316 : *
317 : * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
318 : *
319 : * {realfail} is added to prevent the need for scanner
320 : * backup when the {real} rule fails to match completely.
321 : */
322 : decdigit [0-9]
323 : hexdigit [0-9A-Fa-f]
324 : octdigit [0-7]
325 : bindigit [0-1]
326 :
327 : decinteger {decdigit}(_?{decdigit})*
328 : hexinteger 0[xX](_?{hexdigit})+
329 : octinteger 0[oO](_?{octdigit})+
330 : bininteger 0[bB](_?{bindigit})+
331 :
332 : hexfail 0[xX]_?
333 : octfail 0[oO]_?
334 : binfail 0[bB]_?
335 :
336 : numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
337 : numericfail {decinteger}\.\.
338 :
339 : real ({decinteger}|{numeric})[Ee][-+]?{decinteger}
340 : realfail ({decinteger}|{numeric})[Ee][-+]
341 :
342 : /* Positional parameters don't accept underscores. */
343 : param \${decdigit}+
344 :
345 : /*
346 : * An identifier immediately following an integer literal is disallowed because
347 : * in some cases it's ambiguous what is meant: for example, 0x1234 could be
348 : * either a hexinteger or a decinteger "0" and an identifier "x1234". We can
349 : * detect such problems by seeing if integer_junk matches a longer substring
350 : * than any of the XXXinteger patterns (decinteger, hexinteger, octinteger,
351 : * bininteger). One "junk" pattern is sufficient because
352 : * {decinteger}{identifier} will match all the same strings we'd match with
353 : * {hexinteger}{identifier} etc.
354 : *
355 : * Note that the rule for integer_junk must appear after the ones for
356 : * XXXinteger to make this work correctly: 0x1234 will match both hexinteger
357 : * and integer_junk, and we need hexinteger to be chosen in that case.
358 : *
359 : * Also disallow strings matched by numeric_junk, real_junk and param_junk
360 : * for consistency.
361 : */
362 : integer_junk {decinteger}{identifier}
363 : numeric_junk {numeric}{identifier}
364 : real_junk {real}{identifier}
365 : param_junk \${decdigit}+{identifier}
366 :
367 : /* psql-specific: characters allowed in variable names */
368 : variable_char [A-Za-z\200-\377_0-9]
369 :
370 : other .
371 :
372 : /*
373 : * Dollar quoted strings are totally opaque, and no escaping is done on them.
374 : * Other quoted strings must allow some special characters such as single-quote
375 : * and newline.
376 : * Embedded single-quotes are implemented both in the SQL standard
377 : * style of two adjacent single quotes "''" and in the Postgres/Java style
378 : * of escaped-quote "\'".
379 : * Other embedded escaped characters are matched explicitly and the leading
380 : * backslash is dropped from the string.
381 : * Note that xcstart must appear before operator, as explained above!
382 : * Also whitespace (comment) must appear before operator.
383 : */
384 :
385 : %%
386 :
387 : %{
388 : /* Declare some local variables inside yylex(), for convenience */
389 : PsqlScanState cur_state = yyextra;
390 : PQExpBuffer output_buf = cur_state->output_buf;
391 :
392 : /*
393 : * Force flex into the state indicated by start_state. This has a
394 : * couple of purposes: it lets some of the functions below set a new
395 : * starting state without ugly direct access to flex variables, and it
396 : * allows us to transition from one flex lexer to another so that we
397 : * can lex different parts of the source string using separate lexers.
398 : */
399 : BEGIN(cur_state->start_state);
400 : %}
401 :
402 : {whitespace} {
403 : /*
404 : * Note that the whitespace rule includes both true
405 : * whitespace and single-line ("--" style) comments.
406 : * We suppress whitespace until we have collected some
407 : * non-whitespace data. (This interacts with some
408 : * decisions in MainLoop(); see there for details.)
409 : */
410 : if (output_buf->len > 0)
411 : ECHO;
412 : }
413 :
414 : {xcstart} {
415 : cur_state->xcdepth = 0;
416 : BEGIN(xc);
417 : /* Put back any characters past slash-star; see above */
418 : yyless(2);
419 : ECHO;
420 : }
421 :
422 : <xc>{
423 : {xcstart} {
424 : cur_state->xcdepth++;
425 : /* Put back any characters past slash-star; see above */
426 : yyless(2);
427 : ECHO;
428 : }
429 :
430 : {xcstop} {
431 : if (cur_state->xcdepth <= 0)
432 : BEGIN(INITIAL);
433 : else
434 : cur_state->xcdepth--;
435 : ECHO;
436 : }
437 :
438 : {xcinside} {
439 : ECHO;
440 : }
441 :
442 : {op_chars} {
443 : ECHO;
444 : }
445 :
446 : \*+ {
447 : ECHO;
448 : }
449 : } /* <xc> */
450 :
451 : {xbstart} {
452 : BEGIN(xb);
453 : ECHO;
454 : }
455 : <xh>{xhinside} |
456 : <xb>{xbinside} {
457 : ECHO;
458 : }
459 :
460 : {xhstart} {
461 : /* Hexadecimal bit type.
462 : * At some point we should simply pass the string
463 : * forward to the parser and label it there.
464 : * In the meantime, place a leading "x" on the string
465 : * to mark it for the input routine as a hex string.
466 : */
467 : BEGIN(xh);
468 : ECHO;
469 : }
470 :
471 : {xnstart} {
472 : yyless(1); /* eat only 'n' this time */
473 : ECHO;
474 : }
475 :
476 : {xqstart} {
477 : if (cur_state->std_strings)
478 : BEGIN(xq);
479 : else
480 : BEGIN(xe);
481 : ECHO;
482 : }
483 : {xestart} {
484 : BEGIN(xe);
485 : ECHO;
486 : }
487 : {xusstart} {
488 : BEGIN(xus);
489 : ECHO;
490 : }
491 :
492 : <xb,xh,xq,xe,xus>{quote} {
493 : /*
494 : * When we are scanning a quoted string and see an end
495 : * quote, we must look ahead for a possible continuation.
496 : * If we don't see one, we know the end quote was in fact
497 : * the end of the string. To reduce the lexer table size,
498 : * we use a single "xqs" state to do the lookahead for all
499 : * types of strings.
500 : */
501 : cur_state->state_before_str_stop = YYSTATE;
502 : BEGIN(xqs);
503 : ECHO;
504 : }
505 : <xqs>{quotecontinue} {
506 : /*
507 : * Found a quote continuation, so return to the in-quote
508 : * state and continue scanning the literal. Nothing is
509 : * added to the literal's contents.
510 : */
511 : BEGIN(cur_state->state_before_str_stop);
512 : ECHO;
513 : }
514 : <xqs>{quotecontinuefail} |
515 : <xqs>{other} {
516 : /*
517 : * Failed to see a quote continuation. Throw back
518 : * everything after the end quote, and handle the string
519 : * according to the state we were in previously.
520 : */
521 : yyless(0);
522 : BEGIN(INITIAL);
523 : /* There's nothing to echo ... */
524 : }
525 :
526 : <xq,xe,xus>{xqdouble} {
527 : ECHO;
528 : }
529 : <xq,xus>{xqinside} {
530 : ECHO;
531 : }
532 : <xe>{xeinside} {
533 : ECHO;
534 : }
535 : <xe>{xeunicode} {
536 : ECHO;
537 : }
538 : <xe>{xeunicodefail} {
539 : ECHO;
540 : }
541 : <xe>{xeescape} {
542 : ECHO;
543 : }
544 : <xe>{xeoctesc} {
545 : ECHO;
546 : }
547 : <xe>{xehexesc} {
548 : ECHO;
549 : }
550 : <xe>. {
551 : /* This is only needed for \ just before EOF */
552 : ECHO;
553 : }
554 :
555 : {dolqdelim} {
556 : cur_state->dolqstart = pg_strdup(yytext);
557 : BEGIN(xdolq);
558 : ECHO;
559 : }
560 : {dolqfailed} {
561 : /* throw back all but the initial "$" */
562 : yyless(1);
563 : ECHO;
564 : }
565 : <xdolq>{dolqdelim} {
566 : if (strcmp(yytext, cur_state->dolqstart) == 0)
567 : {
568 : free(cur_state->dolqstart);
569 : cur_state->dolqstart = NULL;
570 : BEGIN(INITIAL);
571 : }
572 : else
573 : {
574 : /*
575 : * When we fail to match $...$ to dolqstart, transfer
576 : * the $... part to the output, but put back the final
577 : * $ for rescanning. Consider $delim$...$junk$delim$
578 : */
579 : yyless(yyleng - 1);
580 : }
581 : ECHO;
582 : }
583 : <xdolq>{dolqinside} {
584 : ECHO;
585 : }
586 : <xdolq>{dolqfailed} {
587 : ECHO;
588 : }
589 : <xdolq>. {
590 : /* This is only needed for $ inside the quoted text */
591 : ECHO;
592 : }
593 :
594 : {xdstart} {
595 : BEGIN(xd);
596 : ECHO;
597 : }
598 : {xuistart} {
599 : BEGIN(xui);
600 : ECHO;
601 : }
602 : <xd>{xdstop} {
603 : BEGIN(INITIAL);
604 : ECHO;
605 : }
606 : <xui>{dquote} {
607 : BEGIN(INITIAL);
608 : ECHO;
609 : }
610 : <xd,xui>{xddouble} {
611 : ECHO;
612 : }
613 : <xd,xui>{xdinside} {
614 : ECHO;
615 : }
616 :
617 : {xufailed} {
618 : /* throw back all but the initial u/U */
619 : yyless(1);
620 : ECHO;
621 : }
622 :
623 : {typecast} {
624 : ECHO;
625 : }
626 :
627 : {dot_dot} {
628 : ECHO;
629 : }
630 :
631 : {colon_equals} {
632 : ECHO;
633 : }
634 :
635 : {equals_greater} {
636 : ECHO;
637 : }
638 :
639 : {less_equals} {
640 : ECHO;
641 : }
642 :
643 : {greater_equals} {
644 : ECHO;
645 : }
646 :
647 : {less_greater} {
648 : ECHO;
649 : }
650 :
651 : {not_equals} {
652 : ECHO;
653 : }
654 :
655 : /*
656 : * These rules are specific to psql --- they implement parenthesis
657 : * counting and detection of command-ending semicolon. These must
658 : * appear before the {self} rule so that they take precedence over it.
659 : */
660 :
661 : "(" {
662 : cur_state->paren_depth++;
663 : ECHO;
664 : }
665 :
666 : ")" {
667 : if (cur_state->paren_depth > 0)
668 : cur_state->paren_depth--;
669 : ECHO;
670 : }
671 :
672 : ";" {
673 : ECHO;
674 : if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
675 : {
676 : /* Terminate lexing temporarily */
677 : cur_state->start_state = YY_START;
678 : cur_state->identifier_count = 0;
679 : return LEXRES_SEMI;
680 : }
681 : }
682 :
683 : /*
684 : * psql-specific rules to handle backslash commands and variable
685 : * substitution. We want these before {self}, also.
686 : */
687 :
688 : "\\"[;:] {
689 : /* Force a semi-colon or colon into the query buffer */
690 : psqlscan_emit(cur_state, yytext + 1, 1);
691 : if (yytext[1] == ';')
692 : cur_state->identifier_count = 0;
693 : }
694 :
695 : "\\" {
696 : /* Terminate lexing temporarily */
697 : cur_state->start_state = YY_START;
698 : return LEXRES_BACKSLASH;
699 : }
700 :
701 : :{variable_char}+ {
702 : /* Possible psql variable substitution */
703 : char *varname;
704 : char *value;
705 :
706 : varname = psqlscan_extract_substring(cur_state,
707 : yytext + 1,
708 : yyleng - 1);
709 : if (cur_state->callbacks->get_variable)
710 : value = cur_state->callbacks->get_variable(varname,
711 : PQUOTE_PLAIN,
712 : cur_state->cb_passthrough);
713 : else
714 : value = NULL;
715 :
716 : if (value)
717 : {
718 : /* It is a variable, check for recursion */
719 : if (psqlscan_var_is_current_source(cur_state, varname))
720 : {
721 : /* Recursive expansion --- don't go there */
722 : pg_log_warning("skipping recursive expansion of variable \"%s\"",
723 : varname);
724 : /* Instead copy the string as is */
725 : ECHO;
726 : }
727 : else
728 : {
729 : /* OK, perform substitution */
730 : psqlscan_push_new_buffer(cur_state, value, varname);
731 : /* yy_scan_string already made buffer active */
732 : }
733 : free(value);
734 : }
735 : else
736 : {
737 : /*
738 : * if the variable doesn't exist we'll copy the string
739 : * as is
740 : */
741 : ECHO;
742 : }
743 :
744 : free(varname);
745 : }
746 :
747 : :'{variable_char}+' {
748 : psqlscan_escape_variable(cur_state, yytext, yyleng,
749 : PQUOTE_SQL_LITERAL);
750 : }
751 :
752 : :\"{variable_char}+\" {
753 : psqlscan_escape_variable(cur_state, yytext, yyleng,
754 : PQUOTE_SQL_IDENT);
755 : }
756 :
757 : :\{\?{variable_char}+\} {
758 : psqlscan_test_variable(cur_state, yytext, yyleng);
759 : }
760 :
761 : /*
762 : * These rules just avoid the need for scanner backup if one of the
763 : * three rules above fails to match completely.
764 : */
765 :
766 : :'{variable_char}* {
767 : /* Throw back everything but the colon */
768 : yyless(1);
769 : ECHO;
770 : }
771 :
772 : :\"{variable_char}* {
773 : /* Throw back everything but the colon */
774 : yyless(1);
775 : ECHO;
776 : }
777 :
778 : :\{\?{variable_char}* {
779 : /* Throw back everything but the colon */
780 : yyless(1);
781 : ECHO;
782 : }
783 : :\{ {
784 : /* Throw back everything but the colon */
785 : yyless(1);
786 : ECHO;
787 : }
788 :
789 : /*
790 : * Back to backend-compatible rules.
791 : */
792 :
793 : {self} {
794 : ECHO;
795 : }
796 :
797 : {operator} {
798 : /*
799 : * Check for embedded slash-star or dash-dash; those
800 : * are comment starts, so operator must stop there.
801 : * Note that slash-star or dash-dash at the first
802 : * character will match a prior rule, not this one.
803 : */
804 : int nchars = yyleng;
805 : char *slashstar = strstr(yytext, "/*");
806 : char *dashdash = strstr(yytext, "--");
807 :
808 : if (slashstar && dashdash)
809 : {
810 : /* if both appear, take the first one */
811 : if (slashstar > dashdash)
812 : slashstar = dashdash;
813 : }
814 : else if (!slashstar)
815 : slashstar = dashdash;
816 : if (slashstar)
817 : nchars = slashstar - yytext;
818 :
819 : /*
820 : * For SQL compatibility, '+' and '-' cannot be the
821 : * last char of a multi-char operator unless the operator
822 : * contains chars that are not in SQL operators.
823 : * The idea is to lex '=-' as two operators, but not
824 : * to forbid operator names like '?-' that could not be
825 : * sequences of SQL operators.
826 : */
827 : if (nchars > 1 &&
828 : (yytext[nchars - 1] == '+' ||
829 : yytext[nchars - 1] == '-'))
830 : {
831 : int ic;
832 :
833 : for (ic = nchars - 2; ic >= 0; ic--)
834 : {
835 : char c = yytext[ic];
836 : if (c == '~' || c == '!' || c == '@' ||
837 : c == '#' || c == '^' || c == '&' ||
838 : c == '|' || c == '`' || c == '?' ||
839 : c == '%')
840 : break;
841 : }
842 : if (ic < 0)
843 : {
844 : /*
845 : * didn't find a qualifying character, so remove
846 : * all trailing [+-]
847 : */
848 : do {
849 : nchars--;
850 : } while (nchars > 1 &&
851 : (yytext[nchars - 1] == '+' ||
852 : yytext[nchars - 1] == '-'));
853 : }
854 : }
855 :
856 : if (nchars < yyleng)
857 : {
858 : /* Strip the unwanted chars from the token */
859 : yyless(nchars);
860 : }
861 : ECHO;
862 : }
863 :
864 : {param} {
865 : ECHO;
866 : }
867 : {param_junk} {
868 : ECHO;
869 : }
870 :
871 : {decinteger} {
872 : ECHO;
873 : }
874 : {hexinteger} {
875 : ECHO;
876 : }
877 : {octinteger} {
878 : ECHO;
879 : }
880 : {bininteger} {
881 : ECHO;
882 : }
883 : {hexfail} {
884 : ECHO;
885 : }
886 : {octfail} {
887 : ECHO;
888 : }
889 : {binfail} {
890 : ECHO;
891 : }
892 : {numeric} {
893 : ECHO;
894 : }
895 : {numericfail} {
896 : /* throw back the .., and treat as integer */
897 : yyless(yyleng - 2);
898 : ECHO;
899 : }
900 : {real} {
901 : ECHO;
902 : }
903 : {realfail} {
904 : ECHO;
905 : }
906 : {integer_junk} {
907 : ECHO;
908 : }
909 : {numeric_junk} {
910 : ECHO;
911 : }
912 : {real_junk} {
913 : ECHO;
914 : }
915 :
916 :
917 : {identifier} {
918 : /*
919 : * We need to track if we are inside a BEGIN .. END block
920 : * in a function definition, so that semicolons contained
921 : * therein don't terminate the whole statement. Short of
922 : * writing a full parser here, the following heuristic
923 : * should work. First, we track whether the beginning of
924 : * the statement matches CREATE [OR REPLACE]
925 : * {FUNCTION|PROCEDURE}
926 : */
927 :
928 : if (cur_state->identifier_count == 0)
929 : memset(cur_state->identifiers, 0, sizeof(cur_state->identifiers));
930 :
931 : if (pg_strcasecmp(yytext, "create") == 0 ||
932 : pg_strcasecmp(yytext, "function") == 0 ||
933 : pg_strcasecmp(yytext, "procedure") == 0 ||
934 : pg_strcasecmp(yytext, "or") == 0 ||
935 : pg_strcasecmp(yytext, "replace") == 0)
936 : {
937 : if (cur_state->identifier_count < sizeof(cur_state->identifiers))
938 : cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned char) yytext[0]);
939 : }
940 :
941 : cur_state->identifier_count++;
942 :
943 : if (cur_state->identifiers[0] == 'c' &&
944 : (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' ||
945 : (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' &&
946 : (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p'))) &&
947 : cur_state->paren_depth == 0)
948 : {
949 : if (pg_strcasecmp(yytext, "begin") == 0)
950 : cur_state->begin_depth++;
951 : else if (pg_strcasecmp(yytext, "case") == 0)
952 : {
953 : /*
954 : * CASE also ends with END. We only need to track
955 : * this if we are already inside a BEGIN.
956 : */
957 : if (cur_state->begin_depth >= 1)
958 : cur_state->begin_depth++;
959 : }
960 : else if (pg_strcasecmp(yytext, "end") == 0)
961 : {
962 : if (cur_state->begin_depth > 0)
963 : cur_state->begin_depth--;
964 : }
965 : }
966 :
967 : ECHO;
968 : }
969 :
970 : {other} {
971 : ECHO;
972 : }
973 :
974 : <<EOF>> {
975 : if (cur_state->buffer_stack == NULL)
976 : {
977 : cur_state->start_state = YY_START;
978 : return LEXRES_EOL; /* end of input reached */
979 : }
980 :
981 : /*
982 : * We were expanding a variable, so pop the inclusion
983 : * stack and keep lexing
984 : */
985 : psqlscan_pop_buffer_stack(cur_state);
986 : psqlscan_select_top_buffer(cur_state);
987 : }
988 :
989 : %%
990 :
991 : /* LCOV_EXCL_STOP */
992 :
993 : /*
994 : * Create a lexer working state struct.
995 : *
996 : * callbacks is a struct of function pointers that encapsulate some
997 : * behavior we need from the surrounding program. This struct must
998 : * remain valid for the lifespan of the PsqlScanState.
999 : */
1000 : PsqlScanState
1001 : psql_scan_create(const PsqlScanCallbacks *callbacks)
1002 18728 : {
1003 : PsqlScanState state;
1004 :
1005 : state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
1006 18728 :
1007 : state->callbacks = callbacks;
1008 18728 :
1009 : yylex_init(&state->scanner);
1010 18728 :
1011 : yyset_extra(state, state->scanner);
1012 18728 :
1013 : psql_scan_reset(state);
1014 18728 :
1015 : return state;
1016 18728 : }
1017 :
1018 : /*
1019 : * Destroy a lexer working state struct, releasing all resources.
1020 : */
1021 : void
1022 : psql_scan_destroy(PsqlScanState state)
1023 18626 : {
1024 : psql_scan_finish(state);
1025 18626 :
1026 : psql_scan_reset(state);
1027 18626 :
1028 : yylex_destroy(state->scanner);
1029 18626 :
1030 : free(state);
1031 18626 : }
1032 18626 :
1033 : /*
1034 : * Set the callback passthrough pointer for the lexer.
1035 : *
1036 : * This could have been integrated into psql_scan_create, but keeping it
1037 : * separate allows the application to change the pointer later, which might
1038 : * be useful.
1039 : */
1040 : void
1041 : psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
1042 18166 : {
1043 : state->cb_passthrough = passthrough;
1044 18166 : }
1045 18166 :
1046 : /*
1047 : * Set up to perform lexing of the given input line.
1048 : *
1049 : * The text at *line, extending for line_len bytes, will be scanned by
1050 : * subsequent calls to the psql_scan routines. psql_scan_finish should
1051 : * be called when scanning is complete. Note that the lexer retains
1052 : * a pointer to the storage at *line --- this string must not be altered
1053 : * or freed until after psql_scan_finish is called.
1054 : *
1055 : * encoding is the libpq identifier for the character encoding in use,
1056 : * and std_strings says whether standard_conforming_strings is on.
1057 : */
1058 : void
1059 : psql_scan_setup(PsqlScanState state,
1060 634666 : const char *line, int line_len,
1061 : int encoding, bool std_strings)
1062 : {
1063 : /* Mustn't be scanning already */
1064 : Assert(state->scanbufhandle == NULL);
1065 : Assert(state->buffer_stack == NULL);
1066 :
1067 : /* Do we need to hack the character set encoding? */
1068 : state->encoding = encoding;
1069 634666 : state->safe_encoding = pg_valid_server_encoding_id(encoding);
1070 634666 :
1071 : /* Save standard-strings flag as well */
1072 : state->std_strings = std_strings;
1073 634666 :
1074 : /* Set up flex input buffer with appropriate translation and padding */
1075 : state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
1076 634666 : &state->scanbuf);
1077 : state->scanline = line;
1078 634666 :
1079 : /* Set lookaside data in case we have to map unsafe encoding */
1080 : state->curline = state->scanbuf;
1081 634666 : state->refline = state->scanline;
1082 634666 : }
1083 634666 :
1084 : /*
1085 : * Do lexical analysis of SQL command text.
1086 : *
1087 : * The text previously passed to psql_scan_setup is scanned, and appended
1088 : * (possibly with transformation) to query_buf.
1089 : *
1090 : * The return value indicates the condition that stopped scanning:
1091 : *
1092 : * PSCAN_SEMICOLON: found a command-ending semicolon. (The semicolon is
1093 : * transferred to query_buf.) The command accumulated in query_buf should
1094 : * be executed, then clear query_buf and call again to scan the remainder
1095 : * of the line.
1096 : *
1097 : * PSCAN_BACKSLASH: found a backslash that starts a special command.
1098 : * Any previous data on the line has been transferred to query_buf.
1099 : * The caller will typically next apply a separate flex lexer to scan
1100 : * the special command.
1101 : *
1102 : * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
1103 : * incomplete SQL command. *prompt is set to the appropriate prompt type.
1104 : *
1105 : * PSCAN_EOL: the end of the line was reached, and there is no lexical
1106 : * reason to consider the command incomplete. The caller may or may not
1107 : * choose to send it. *prompt is set to the appropriate prompt type if
1108 : * the caller chooses to collect more input.
1109 : *
1110 : * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
1111 : * be called next, then the cycle may be repeated with a fresh input line.
1112 : *
1113 : * In all cases, *prompt is set to an appropriate prompt type code for the
1114 : * next line-input operation.
1115 : */
1116 : PsqlScanResult
1117 : psql_scan(PsqlScanState state,
1118 969102 : PQExpBuffer query_buf,
1119 : promptStatus_t *prompt)
1120 : {
1121 : PsqlScanResult result;
1122 : int lexresult;
1123 :
1124 : /* Must be scanning already */
1125 : Assert(state->scanbufhandle != NULL);
1126 :
1127 : /* Set current output target */
1128 : state->output_buf = query_buf;
1129 969102 :
1130 : /* Set input source */
1131 : if (state->buffer_stack != NULL)
1132 969102 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
1133 90 : else
1134 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1135 969012 :
1136 : /* And lex. */
1137 : lexresult = yylex(NULL, state->scanner);
1138 969102 :
1139 : /*
1140 : * Check termination state and return appropriate result info.
1141 : */
1142 : switch (lexresult)
1143 969102 : {
1144 : case LEXRES_EOL: /* end of input */
1145 634408 : switch (state->start_state)
1146 634408 : {
1147 : case INITIAL:
1148 587434 : case xqs: /* we treat this like INITIAL */
1149 : if (state->paren_depth > 0)
1150 587434 : {
1151 : result = PSCAN_INCOMPLETE;
1152 48256 : *prompt = PROMPT_PAREN;
1153 48256 : }
1154 : else if (state->begin_depth > 0)
1155 539178 : {
1156 : result = PSCAN_INCOMPLETE;
1157 518 : *prompt = PROMPT_CONTINUE;
1158 518 : }
1159 : else if (query_buf->len > 0)
1160 538660 : {
1161 : result = PSCAN_EOL;
1162 108264 : *prompt = PROMPT_CONTINUE;
1163 108264 : }
1164 : else
1165 : {
1166 : /* never bother to send an empty buffer */
1167 : result = PSCAN_INCOMPLETE;
1168 430396 : *prompt = PROMPT_READY;
1169 430396 : }
1170 : break;
1171 587434 : case xb:
1172 0 : result = PSCAN_INCOMPLETE;
1173 0 : *prompt = PROMPT_SINGLEQUOTE;
1174 0 : break;
1175 0 : case xc:
1176 794 : result = PSCAN_INCOMPLETE;
1177 794 : *prompt = PROMPT_COMMENT;
1178 794 : break;
1179 794 : case xd:
1180 26 : result = PSCAN_INCOMPLETE;
1181 26 : *prompt = PROMPT_DOUBLEQUOTE;
1182 26 : break;
1183 26 : case xh:
1184 0 : result = PSCAN_INCOMPLETE;
1185 0 : *prompt = PROMPT_SINGLEQUOTE;
1186 0 : break;
1187 0 : case xe:
1188 602 : result = PSCAN_INCOMPLETE;
1189 602 : *prompt = PROMPT_SINGLEQUOTE;
1190 602 : break;
1191 602 : case xq:
1192 9502 : result = PSCAN_INCOMPLETE;
1193 9502 : *prompt = PROMPT_SINGLEQUOTE;
1194 9502 : break;
1195 9502 : case xdolq:
1196 36050 : result = PSCAN_INCOMPLETE;
1197 36050 : *prompt = PROMPT_DOLLARQUOTE;
1198 36050 : break;
1199 36050 : case xui:
1200 0 : result = PSCAN_INCOMPLETE;
1201 0 : *prompt = PROMPT_DOUBLEQUOTE;
1202 0 : break;
1203 0 : case xus:
1204 0 : result = PSCAN_INCOMPLETE;
1205 0 : *prompt = PROMPT_SINGLEQUOTE;
1206 0 : break;
1207 0 : default:
1208 0 : /* can't get here */
1209 : fprintf(stderr, "invalid YY_START\n");
1210 0 : exit(1);
1211 0 : }
1212 : break;
1213 634408 : case LEXRES_SEMI: /* semicolon */
1214 319628 : result = PSCAN_SEMICOLON;
1215 319628 : *prompt = PROMPT_READY;
1216 319628 : break;
1217 319628 : case LEXRES_BACKSLASH: /* backslash */
1218 15066 : result = PSCAN_BACKSLASH;
1219 15066 : *prompt = PROMPT_READY;
1220 15066 : break;
1221 15066 : default:
1222 0 : /* can't get here */
1223 : fprintf(stderr, "invalid yylex result\n");
1224 0 : exit(1);
1225 0 : }
1226 :
1227 : return result;
1228 969102 : }
1229 :
1230 : /*
1231 : * Clean up after scanning a string. This flushes any unread input and
1232 : * releases resources (but not the PsqlScanState itself). Note however
1233 : * that this does not reset the lexer scan state; that can be done by
1234 : * psql_scan_reset(), which is an orthogonal operation.
1235 : *
1236 : * It is legal to call this when not scanning anything (makes it easier
1237 : * to deal with error recovery).
1238 : */
1239 : void
1240 : psql_scan_finish(PsqlScanState state)
1241 653188 : {
1242 : /* Drop any incomplete variable expansions. */
1243 : while (state->buffer_stack != NULL)
1244 653188 : psqlscan_pop_buffer_stack(state);
1245 0 :
1246 : /* Done with the outer scan buffer, too */
1247 : if (state->scanbufhandle)
1248 653188 : yy_delete_buffer(state->scanbufhandle, state->scanner);
1249 634566 : state->scanbufhandle = NULL;
1250 653188 : if (state->scanbuf)
1251 653188 : free(state->scanbuf);
1252 634566 : state->scanbuf = NULL;
1253 653188 : }
1254 653188 :
1255 : /*
1256 : * Reset lexer scanning state to start conditions. This is appropriate
1257 : * for executing \r psql commands (or any other time that we discard the
1258 : * prior contents of query_buf). It is not, however, necessary to do this
1259 : * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1260 : * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1261 : * conditions are returned.
1262 : *
1263 : * Note that this is unrelated to flushing unread input; that task is
1264 : * done by psql_scan_finish().
1265 : */
1266 : void
1267 : psql_scan_reset(PsqlScanState state)
1268 38742 : {
1269 : state->start_state = INITIAL;
1270 38742 : state->paren_depth = 0;
1271 38742 : state->xcdepth = 0; /* not really necessary */
1272 38742 : if (state->dolqstart)
1273 38742 : free(state->dolqstart);
1274 0 : state->dolqstart = NULL;
1275 38742 : state->identifier_count = 0;
1276 38742 : state->begin_depth = 0;
1277 38742 : }
1278 38742 :
1279 : /*
1280 : * Reselect this lexer (psqlscan.l) after using another one.
1281 : *
1282 : * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
1283 : * state, because we'd never switch to another lexer in a different state.
1284 : * However, we don't want to reset e.g. paren_depth, so this can't be
1285 : * the same as psql_scan_reset().
1286 : *
1287 : * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
1288 : * must be a superset of this.
1289 : *
1290 : * Note: it seems likely that other lexers could just assign INITIAL for
1291 : * themselves, since that probably has the value zero in every flex-generated
1292 : * lexer. But let's not assume that.
1293 : */
1294 : void
1295 : psql_scan_reselect_sql_lexer(PsqlScanState state)
1296 67388 : {
1297 : state->start_state = INITIAL;
1298 67388 : }
1299 67388 :
1300 : /*
1301 : * Return true if lexer is currently in an "inside quotes" state.
1302 : *
1303 : * This is pretty grotty but is needed to preserve the old behavior
1304 : * that mainloop.c drops blank lines not inside quotes without even
1305 : * echoing them.
1306 : */
1307 : bool
1308 : psql_scan_in_quote(PsqlScanState state)
1309 131768 : {
1310 : return state->start_state != INITIAL &&
1311 132736 : state->start_state != xqs;
1312 968 : }
1313 :
1314 : /*
1315 : * Push the given string onto the stack of stuff to scan.
1316 : *
1317 : * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1318 : */
1319 : void
1320 : psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
1321 1280 : const char *varname)
1322 : {
1323 : StackElem *stackelem;
1324 :
1325 : stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
1326 1280 :
1327 : /*
1328 : * In current usage, the passed varname points at the current flex input
1329 : * buffer; we must copy it before calling psqlscan_prepare_buffer()
1330 : * because that will change the buffer state.
1331 : */
1332 : stackelem->varname = varname ? pg_strdup(varname) : NULL;
1333 1280 :
1334 : stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
1335 1280 : &stackelem->bufstring);
1336 : state->curline = stackelem->bufstring;
1337 1280 : if (state->safe_encoding)
1338 1280 : {
1339 : stackelem->origstring = NULL;
1340 1280 : state->refline = stackelem->bufstring;
1341 1280 : }
1342 : else
1343 : {
1344 : stackelem->origstring = pg_strdup(newstr);
1345 0 : state->refline = stackelem->origstring;
1346 0 : }
1347 : stackelem->next = state->buffer_stack;
1348 1280 : state->buffer_stack = stackelem;
1349 1280 : }
1350 1280 :
1351 : /*
1352 : * Pop the topmost buffer stack item (there must be one!)
1353 : *
1354 : * NB: after this, the flex input state is unspecified; caller must
1355 : * switch to an appropriate buffer to continue lexing.
1356 : * See psqlscan_select_top_buffer().
1357 : */
1358 : void
1359 : psqlscan_pop_buffer_stack(PsqlScanState state)
1360 1280 : {
1361 : StackElem *stackelem = state->buffer_stack;
1362 1280 :
1363 : state->buffer_stack = stackelem->next;
1364 1280 : yy_delete_buffer(stackelem->buf, state->scanner);
1365 1280 : free(stackelem->bufstring);
1366 1280 : if (stackelem->origstring)
1367 1280 : free(stackelem->origstring);
1368 0 : if (stackelem->varname)
1369 1280 : free(stackelem->varname);
1370 1280 : free(stackelem);
1371 1280 : }
1372 1280 :
1373 : /*
1374 : * Select the topmost surviving buffer as the active input.
1375 : */
1376 : void
1377 : psqlscan_select_top_buffer(PsqlScanState state)
1378 1280 : {
1379 : StackElem *stackelem = state->buffer_stack;
1380 1280 :
1381 : if (stackelem != NULL)
1382 1280 : {
1383 : yy_switch_to_buffer(stackelem->buf, state->scanner);
1384 0 : state->curline = stackelem->bufstring;
1385 0 : state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
1386 0 : }
1387 : else
1388 : {
1389 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1390 1280 : state->curline = state->scanbuf;
1391 1280 : state->refline = state->scanline;
1392 1280 : }
1393 : }
1394 1280 :
1395 : /*
1396 : * Check if specified variable name is the source for any string
1397 : * currently being scanned
1398 : */
1399 : bool
1400 : psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
1401 1280 : {
1402 : StackElem *stackelem;
1403 :
1404 : for (stackelem = state->buffer_stack;
1405 1280 : stackelem != NULL;
1406 : stackelem = stackelem->next)
1407 0 : {
1408 : if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
1409 0 : return true;
1410 0 : }
1411 : return false;
1412 1280 : }
1413 :
1414 : /*
1415 : * Set up a flex input buffer to scan the given data. We always make a
1416 : * copy of the data. If working in an unsafe encoding, the copy has
1417 : * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1418 : *
1419 : * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1420 : */
1421 : YY_BUFFER_STATE
1422 : psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
1423 635946 : char **txtcopy)
1424 : {
1425 : char *newtxt;
1426 :
1427 : /* Flex wants two \0 characters after the actual data */
1428 : newtxt = pg_malloc(len + 2);
1429 635946 : *txtcopy = newtxt;
1430 635946 : newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1431 635946 :
1432 : if (state->safe_encoding)
1433 635946 : memcpy(newtxt, txt, len);
1434 635946 : else
1435 : {
1436 : /* Gotta do it the hard way */
1437 : int i = 0;
1438 0 :
1439 : while (i < len)
1440 0 : {
1441 : int thislen = PQmblen(txt + i, state->encoding);
1442 0 :
1443 : /* first byte should always be okay... */
1444 : newtxt[i] = txt[i];
1445 0 : i++;
1446 0 : while (--thislen > 0 && i < len)
1447 0 : newtxt[i++] = (char) 0xFF;
1448 0 : }
1449 : }
1450 :
1451 : return yy_scan_buffer(newtxt, len + 2, state->scanner);
1452 635946 : }
1453 :
1454 : /*
1455 : * psqlscan_emit() --- body for ECHO macro
1456 : *
1457 : * NB: this must be used for ALL and ONLY the text copied from the flex
1458 : * input data. If you pass it something that is not part of the yytext
1459 : * string, you are making a mistake. Internally generated text can be
1460 : * appended directly to state->output_buf.
1461 : */
1462 : void
1463 : psqlscan_emit(PsqlScanState state, const char *txt, int len)
1464 7612168 : {
1465 : PQExpBuffer output_buf = state->output_buf;
1466 7612168 :
1467 : if (state->safe_encoding)
1468 7612168 : appendBinaryPQExpBuffer(output_buf, txt, len);
1469 7612168 : else
1470 : {
1471 : /* Gotta do it the hard way */
1472 : const char *reference = state->refline;
1473 0 : int i;
1474 :
1475 : reference += (txt - state->curline);
1476 0 :
1477 : for (i = 0; i < len; i++)
1478 0 : {
1479 : char ch = txt[i];
1480 0 :
1481 : if (ch == (char) 0xFF)
1482 0 : ch = reference[i];
1483 0 : appendPQExpBufferChar(output_buf, ch);
1484 0 : }
1485 : }
1486 : }
1487 7612168 :
1488 : /*
1489 : * psqlscan_extract_substring --- fetch value of (part of) the current token
1490 : *
1491 : * This is like psqlscan_emit(), except that the data is returned as a
1492 : * malloc'd string rather than being pushed directly to state->output_buf.
1493 : */
1494 : char *
1495 : psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
1496 4952 : {
1497 : char *result = (char *) pg_malloc(len + 1);
1498 4952 :
1499 : if (state->safe_encoding)
1500 4952 : memcpy(result, txt, len);
1501 4952 : else
1502 : {
1503 : /* Gotta do it the hard way */
1504 : const char *reference = state->refline;
1505 0 : int i;
1506 :
1507 : reference += (txt - state->curline);
1508 0 :
1509 : for (i = 0; i < len; i++)
1510 0 : {
1511 : char ch = txt[i];
1512 0 :
1513 : if (ch == (char) 0xFF)
1514 0 : ch = reference[i];
1515 0 : result[i] = ch;
1516 0 : }
1517 : }
1518 : result[len] = '\0';
1519 4952 : return result;
1520 4952 : }
1521 :
1522 : /*
1523 : * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
1524 : *
1525 : * If the variable name is found, escape its value using the appropriate
1526 : * quoting method and emit the value to output_buf. (Since the result is
1527 : * surely quoted, there is never any reason to rescan it.) If we don't
1528 : * find the variable or escaping fails, emit the token as-is.
1529 : */
1530 : void
1531 : psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
1532 918 : PsqlScanQuoteType quote)
1533 : {
1534 : char *varname;
1535 : char *value;
1536 :
1537 : /* Variable lookup. */
1538 : varname = psqlscan_extract_substring(state, txt + 2, len - 3);
1539 918 : if (state->callbacks->get_variable)
1540 918 : value = state->callbacks->get_variable(varname, quote,
1541 918 : state->cb_passthrough);
1542 : else
1543 : value = NULL;
1544 0 : free(varname);
1545 918 :
1546 : if (value)
1547 918 : {
1548 : /* Emit the suitably-escaped value */
1549 : appendPQExpBufferStr(state->output_buf, value);
1550 862 : free(value);
1551 862 : }
1552 : else
1553 : {
1554 : /* Emit original token as-is */
1555 : psqlscan_emit(state, txt, len);
1556 56 : }
1557 : }
1558 918 :
1559 : void
1560 : psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
1561 32 : {
1562 : char *varname;
1563 : char *value;
1564 :
1565 : varname = psqlscan_extract_substring(state, txt + 3, len - 4);
1566 32 : if (state->callbacks->get_variable)
1567 32 : value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
1568 32 : state->cb_passthrough);
1569 : else
1570 : value = NULL;
1571 0 : free(varname);
1572 32 :
1573 : if (value != NULL)
1574 32 : {
1575 : psqlscan_emit(state, "TRUE", 4);
1576 14 : free(value);
1577 14 : }
1578 : else
1579 : {
1580 : psqlscan_emit(state, "FALSE", 5);
1581 18 : }
1582 : }
|