Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * psqlscan.l
5 : * lexical scanner for SQL commands
6 : *
7 : * This lexer used to be part of psql, and that heritage is reflected in
8 : * the file name as well as function and typedef names, though it can now
9 : * be used by other frontend programs as well. It's also possible to extend
10 : * this lexer with a compatible add-on lexer to handle program-specific
11 : * backslash commands.
12 : *
13 : * This code is mainly concerned with determining where the end of a SQL
14 : * statement is: we are looking for semicolons that are not within quotes,
15 : * comments, or parentheses. The most reliable way to handle this is to
16 : * borrow the backend's flex lexer rules, lock, stock, and barrel. The rules
17 : * below are (except for a few) the same as the backend's, but their actions
18 : * are just ECHO whereas the backend's actions generally do other things.
19 : *
20 : * XXX The rules in this file must be kept in sync with the backend lexer!!!
21 : *
22 : * XXX Avoid creating backtracking cases --- see the backend lexer for info.
23 : *
24 : * See psqlscan_int.h for additional commentary.
25 : *
26 : *
27 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
28 : * Portions Copyright (c) 1994, Regents of the University of California
29 : *
30 : * IDENTIFICATION
31 : * src/fe_utils/psqlscan.l
32 : *
33 : *-------------------------------------------------------------------------
34 : */
35 : #include "postgres_fe.h"
36 :
37 : #include "common/logging.h"
38 : #include "fe_utils/psqlscan.h"
39 :
40 : #include "libpq-fe.h"
41 : }
42 :
43 : %{
44 :
45 : /* LCOV_EXCL_START */
46 :
47 : #include "fe_utils/psqlscan_int.h"
48 :
49 : /*
50 : * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
51 : * doesn't presently make use of that argument, so just declare it as int.
52 : */
53 : typedef int YYSTYPE;
54 :
55 :
56 : /* Return values from yylex() */
57 : #define LEXRES_EOL 0 /* end of input */
58 : #define LEXRES_SEMI 1 /* command-terminating semicolon found */
59 : #define LEXRES_BACKSLASH 2 /* backslash command start */
60 :
61 :
62 : #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
63 :
64 : %}
65 :
66 : %option reentrant
67 : %option bison-bridge
68 : %option 8bit
69 : %option never-interactive
70 : %option nodefault
71 : %option noinput
72 : %option nounput
73 : %option noyywrap
74 : %option warn
75 : %option prefix="psql_yy"
76 :
77 : /*
78 : * Set the type of yyextra; we use it as a pointer back to the containing
79 : * PsqlScanState.
80 : */
81 : %option extra-type="PsqlScanState"
82 :
83 : /*
84 : * All of the following definitions and rules should exactly match
85 : * src/backend/parser/scan.l so far as the flex patterns are concerned.
86 : * The rule bodies are just ECHO as opposed to what the backend does,
87 : * however. (But be sure to duplicate code that affects the lexing process,
88 : * such as BEGIN() and yyless().) Also, psqlscan uses a single <<EOF>> rule
89 : * whereas scan.l has a separate one for each exclusive state.
90 : */
91 :
92 : /*
93 : * OK, here is a short description of lex/flex rules behavior.
94 : * The longest pattern which matches an input string is always chosen.
95 : * For equal-length patterns, the first occurring in the rules list is chosen.
96 : * INITIAL is the starting state, to which all non-conditional rules apply.
97 : * Exclusive states change parsing rules while the state is active. When in
98 : * an exclusive state, only those rules defined for that state apply.
99 : *
100 : * We use exclusive states for quoted strings, extended comments,
101 : * and to eliminate parsing troubles for numeric strings.
102 : * Exclusive states:
103 : * <xb> bit string literal
104 : * <xc> extended C-style comments
105 : * <xd> delimited identifiers (double-quoted identifiers)
106 : * <xh> hexadecimal byte string
107 : * <xq> standard quoted strings
108 : * <xqs> quote stop (detect continued strings)
109 : * <xe> extended quoted strings (support backslash escape sequences)
110 : * <xdolq> $foo$ quoted strings
111 : * <xui> quoted identifier with Unicode escapes
112 : * <xus> quoted string with Unicode escapes
113 : *
114 : * Note: we intentionally don't mimic the backend's <xeu> state; we have
115 : * no need to distinguish it from <xe> state, and no good way to get out
116 : * of it in error cases. The backend just throws yyerror() in those
117 : * cases, but that's not an option here.
118 : */
119 :
120 : %x xb
121 : %x xc
122 : %x xd
123 : %x xh
124 : %x xq
125 : %x xqs
126 : %x xe
127 : %x xdolq
128 : %x xui
129 : %x xus
130 :
131 : /*
132 : * In order to make the world safe for Windows and Mac clients as well as
133 : * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
134 : * sequence will be seen as two successive newlines, but that doesn't cause
135 : * any problems. Comments that start with -- and extend to the next
136 : * newline are treated as equivalent to a single whitespace character.
137 : *
138 : * NOTE a fine point: if there is no newline following --, we will absorb
139 : * everything to the end of the input as a comment. This is correct. Older
140 : * versions of Postgres failed to recognize -- as a comment if the input
141 : * did not end with a newline.
142 : *
143 : * non_newline_space tracks all space characters except newlines.
144 : *
145 : * XXX if you change the set of whitespace characters, fix scanner_isspace()
146 : * to agree.
147 : */
148 :
149 : space [ \t\n\r\f\v]
150 : non_newline_space [ \t\f\v]
151 : newline [\n\r]
152 : non_newline [^\n\r]
153 :
154 : comment ("--"{non_newline}*)
155 :
156 : whitespace ({space}+|{comment})
157 :
158 : /*
159 : * SQL requires at least one newline in the whitespace separating
160 : * string literals that are to be concatenated. Silly, but who are we
161 : * to argue? Note that {whitespace_with_newline} should not have * after
162 : * it, whereas {whitespace} should generally have a * after it...
163 : */
164 :
165 : special_whitespace ({space}+|{comment}{newline})
166 : non_newline_whitespace ({non_newline_space}|{comment})
167 : whitespace_with_newline ({non_newline_whitespace}*{newline}{special_whitespace}*)
168 :
169 : quote '
170 : /* If we see {quote} then {quotecontinue}, the quoted string continues */
171 : quotecontinue {whitespace_with_newline}{quote}
172 :
173 : /*
174 : * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
175 : * {quotecontinue}. It might seem that this could just be {whitespace}*,
176 : * but if there's a dash after {whitespace_with_newline}, it must be consumed
177 : * to see if there's another dash --- which would start a {comment} and thus
178 : * allow continuation of the {quotecontinue} token.
179 : */
180 : quotecontinuefail {whitespace}*"-"?
181 :
182 : /* Bit string
183 : * It is tempting to scan the string for only those characters
184 : * which are allowed. However, this leads to silently swallowed
185 : * characters if illegal characters are included in the string.
186 : * For example, if xbinside is [01] then B'ABCD' is interpreted
187 : * as a zero-length string, and the ABCD' is lost!
188 : * Better to pass the string forward and let the input routines
189 : * validate the contents.
190 : */
191 : xbstart [bB]{quote}
192 : xbinside [^']*
193 :
194 : /* Hexadecimal byte string */
195 : xhstart [xX]{quote}
196 : xhinside [^']*
197 :
198 : /* National character */
199 : xnstart [nN]{quote}
200 :
201 : /* Quoted string that allows backslash escapes */
202 : xestart [eE]{quote}
203 : xeinside [^\\']+
204 : xeescape [\\][^0-7]
205 : xeoctesc [\\][0-7]{1,3}
206 : xehexesc [\\]x[0-9A-Fa-f]{1,2}
207 : xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
208 : xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
209 :
210 : /* Extended quote
211 : * xqdouble implements embedded quote, ''''
212 : */
213 : xqstart {quote}
214 : xqdouble {quote}{quote}
215 : xqinside [^']+
216 :
217 : /* $foo$ style quotes ("dollar quoting")
218 : * The quoted string starts with $foo$ where "foo" is an optional string
219 : * in the form of an identifier, except that it may not contain "$",
220 : * and extends to the first occurrence of an identical string.
221 : * There is *no* processing of the quoted text.
222 : *
223 : * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
224 : * fails to match its trailing "$".
225 : */
226 : dolq_start [A-Za-z\200-\377_]
227 : dolq_cont [A-Za-z\200-\377_0-9]
228 : dolqdelim \$({dolq_start}{dolq_cont}*)?\$
229 : dolqfailed \${dolq_start}{dolq_cont}*
230 : dolqinside [^$]+
231 :
232 : /* Double quote
233 : * Allows embedded spaces and other special characters into identifiers.
234 : */
235 : dquote \"
236 : xdstart {dquote}
237 : xdstop {dquote}
238 : xddouble {dquote}{dquote}
239 : xdinside [^"]+
240 :
241 : /* Quoted identifier with Unicode escapes */
242 : xuistart [uU]&{dquote}
243 :
244 : /* Quoted string with Unicode escapes */
245 : xusstart [uU]&{quote}
246 :
247 : /* error rule to avoid backup */
248 : xufailed [uU]&
249 :
250 :
251 : /* C-style comments
252 : *
253 : * The "extended comment" syntax closely resembles allowable operator syntax.
254 : * The tricky part here is to get lex to recognize a string starting with
255 : * slash-star as a comment, when interpreting it as an operator would produce
256 : * a longer match --- remember lex will prefer a longer match! Also, if we
257 : * have something like plus-slash-star, lex will think this is a 3-character
258 : * operator whereas we want to see it as a + operator and a comment start.
259 : * The solution is two-fold:
260 : * 1. append {op_chars}* to xcstart so that it matches as much text as
261 : * {operator} would. Then the tie-breaker (first matching rule of same
262 : * length) ensures xcstart wins. We put back the extra stuff with yyless()
263 : * in case it contains a star-slash that should terminate the comment.
264 : * 2. In the operator rule, check for slash-star within the operator, and
265 : * if found throw it back with yyless(). This handles the plus-slash-star
266 : * problem.
267 : * Dash-dash comments have similar interactions with the operator rule.
268 : */
269 : xcstart \/\*{op_chars}*
270 : xcstop \*+\/
271 : xcinside [^*/]+
272 :
273 : ident_start [A-Za-z\200-\377_]
274 : ident_cont [A-Za-z\200-\377_0-9\$]
275 :
276 : identifier {ident_start}{ident_cont}*
277 :
278 : /* Assorted special-case operators and operator-like tokens */
279 : typecast "::"
280 : dot_dot \.\.
281 : colon_equals ":="
282 :
283 : /*
284 : * These operator-like tokens (unlike the above ones) also match the {operator}
285 : * rule, which means that they might be overridden by a longer match if they
286 : * are followed by a comment start or a + or - character. Accordingly, if you
287 : * add to this list, you must also add corresponding code to the {operator}
288 : * block to return the correct token in such cases. (This is not needed in
289 : * psqlscan.l since the token value is ignored there.)
290 : */
291 : equals_greater "=>"
292 : less_equals "<="
293 : greater_equals ">="
294 : less_greater "<>"
295 : not_equals "!="
296 : /* Note there is no need for left_arrow, since "<-" is not a single operator. */
297 : right_arrow "->"
298 :
299 : /*
300 : * "self" is the set of chars that should be returned as single-character
301 : * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
302 : * which can be one or more characters long (but if a single-char token
303 : * appears in the "self" set, it is not to be returned as an Op). Note
304 : * that the sets overlap, but each has some chars that are not in the other.
305 : *
306 : * If you change either set, adjust the character lists appearing in the
307 : * rule for "operator"!
308 : */
309 : self [,()\[\].;\:\|\+\-\*\/\%\^\<\>\=]
310 : op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
311 : operator {op_chars}+
312 :
313 : /*
314 : * Numbers
315 : *
316 : * Unary minus is not part of a number here. Instead we pass it separately to
317 : * the parser, and there it gets coerced via doNegate().
318 : *
319 : * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
320 : *
321 : * {realfail} is added to prevent the need for scanner
322 : * backup when the {real} rule fails to match completely.
323 : */
324 : decdigit [0-9]
325 : hexdigit [0-9A-Fa-f]
326 : octdigit [0-7]
327 : bindigit [0-1]
328 :
329 : decinteger {decdigit}(_?{decdigit})*
330 : hexinteger 0[xX](_?{hexdigit})+
331 : octinteger 0[oO](_?{octdigit})+
332 : bininteger 0[bB](_?{bindigit})+
333 :
334 : hexfail 0[xX]_?
335 : octfail 0[oO]_?
336 : binfail 0[bB]_?
337 :
338 : numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
339 : numericfail {decinteger}\.\.
340 :
341 : real ({decinteger}|{numeric})[Ee][-+]?{decinteger}
342 : realfail ({decinteger}|{numeric})[Ee][-+]
343 :
344 : /* Positional parameters don't accept underscores. */
345 : param \${decdigit}+
346 :
347 : /*
348 : * An identifier immediately following an integer literal is disallowed because
349 : * in some cases it's ambiguous what is meant: for example, 0x1234 could be
350 : * either a hexinteger or a decinteger "0" and an identifier "x1234". We can
351 : * detect such problems by seeing if integer_junk matches a longer substring
352 : * than any of the XXXinteger patterns (decinteger, hexinteger, octinteger,
353 : * bininteger). One "junk" pattern is sufficient because
354 : * {decinteger}{identifier} will match all the same strings we'd match with
355 : * {hexinteger}{identifier} etc.
356 : *
357 : * Note that the rule for integer_junk must appear after the ones for
358 : * XXXinteger to make this work correctly: 0x1234 will match both hexinteger
359 : * and integer_junk, and we need hexinteger to be chosen in that case.
360 : *
361 : * Also disallow strings matched by numeric_junk, real_junk and param_junk
362 : * for consistency.
363 : */
364 : integer_junk {decinteger}{identifier}
365 : numeric_junk {numeric}{identifier}
366 : real_junk {real}{identifier}
367 : param_junk \${decdigit}+{identifier}
368 :
369 : /* psql-specific: characters allowed in variable names */
370 : variable_char [A-Za-z\200-\377_0-9]
371 :
372 : other .
373 :
374 : /*
375 : * Dollar quoted strings are totally opaque, and no escaping is done on them.
376 : * Other quoted strings must allow some special characters such as single-quote
377 : * and newline.
378 : * Embedded single-quotes are implemented both in the SQL standard
379 : * style of two adjacent single quotes "''" and in the Postgres/Java style
380 : * of escaped-quote "\'".
381 : * Other embedded escaped characters are matched explicitly and the leading
382 : * backslash is dropped from the string.
383 : * Note that xcstart must appear before operator, as explained above!
384 : * Also whitespace (comment) must appear before operator.
385 : */
386 :
387 : %%
388 :
389 : %{
390 : /* Declare some local variables inside yylex(), for convenience */
391 : PsqlScanState cur_state = yyextra;
392 787457 : PQExpBuffer output_buf = cur_state->output_buf;
393 787457 :
394 : /*
395 : * Force flex into the state indicated by start_state. This has a
396 : * couple of purposes: it lets some of the functions below set a new
397 : * starting state without ugly direct access to flex variables, and it
398 : * allows us to transition from one flex lexer to another so that we
399 : * can lex different parts of the source string using separate lexers.
400 : */
401 : BEGIN(cur_state->start_state);
402 787457 : %}
403 :
404 : {whitespace} {
405 : /*
406 : * Note that the whitespace rule includes both true
407 : * whitespace and single-line ("--" style) comments.
408 : * We suppress whitespace until we have collected some
409 : * non-whitespace data. (This interacts with some
410 : * decisions in MainLoop(); see there for details.)
411 : */
412 : if (output_buf->len > 0)
413 1869393 : ECHO;
414 1758265 : }
415 :
416 1869393 : {xcstart} {
417 447 : cur_state->xcdepth = 0;
418 447 : BEGIN(xc);
419 447 : /* Put back any characters past slash-star; see above */
420 : yyless(2);
421 447 : ECHO;
422 447 : }
423 :
424 447 : <xc>{
425 : {xcstart} {
426 12 : cur_state->xcdepth++;
427 12 : /* Put back any characters past slash-star; see above */
428 : yyless(2);
429 12 : ECHO;
430 12 : }
431 :
432 12 : {xcstop} {
433 459 : if (cur_state->xcdepth <= 0)
434 459 : BEGIN(INITIAL);
435 447 : else
436 : cur_state->xcdepth--;
437 12 : ECHO;
438 459 : }
439 :
440 459 : {xcinside} {
441 1024 : ECHO;
442 1024 : }
443 :
444 1024 : {op_chars} {
445 275 : ECHO;
446 275 : }
447 :
448 275 : \*+ {
449 0 : ECHO;
450 0 : }
451 : } /* <xc> */
452 0 :
453 : {xbstart} {
454 500 : BEGIN(xb);
455 500 : ECHO;
456 500 : }
457 : <xh>{xhinside} |
458 500 : <xb>{xbinside} {
459 2695 : ECHO;
460 2695 : }
461 :
462 2695 : {xhstart} {
463 2215 : /* Hexadecimal bit type.
464 : * At some point we should simply pass the string
465 : * forward to the parser and label it there.
466 : * In the meantime, place a leading "x" on the string
467 : * to mark it for the input routine as a hex string.
468 : */
469 : BEGIN(xh);
470 2215 : ECHO;
471 2215 : }
472 :
473 2215 : {xnstart} {
474 0 : yyless(1); /* eat only 'n' this time */
475 0 : ECHO;
476 0 : }
477 :
478 0 : {xqstart} {
479 157333 : if (cur_state->std_strings)
480 157333 : BEGIN(xq);
481 157333 : else
482 : BEGIN(xe);
483 0 : ECHO;
484 157333 : }
485 : {xestart} {
486 157333 : BEGIN(xe);
487 868 : ECHO;
488 868 : }
489 : {xusstart} {
490 868 : BEGIN(xus);
491 368 : ECHO;
492 368 : }
493 :
494 368 : <xb,xh,xq,xe,xus>{quote} {
495 161284 : /*
496 : * When we are scanning a quoted string and see an end
497 : * quote, we must look ahead for a possible continuation.
498 : * If we don't see one, we know the end quote was in fact
499 : * the end of the string. To reduce the lexer table size,
500 : * we use a single "xqs" state to do the lookahead for all
501 : * types of strings.
502 : */
503 : cur_state->state_before_str_stop = YYSTATE;
504 161284 : BEGIN(xqs);
505 161284 : ECHO;
506 161284 : }
507 : <xqs>{quotecontinue} {
508 161284 : /*
509 0 : * Found a quote continuation, so return to the in-quote
510 : * state and continue scanning the literal. Nothing is
511 : * added to the literal's contents.
512 : */
513 : BEGIN(cur_state->state_before_str_stop);
514 0 : ECHO;
515 0 : }
516 : <xqs>{quotecontinuefail} |
517 0 : <xqs>{other} {
518 160472 : /*
519 : * Failed to see a quote continuation. Throw back
520 : * everything after the end quote, and handle the string
521 : * according to the state we were in previously.
522 : */
523 : yyless(0);
524 160472 : BEGIN(INITIAL);
525 160472 : /* There's nothing to echo ... */
526 : }
527 :
528 160472 : <xq,xe,xus>{xqdouble} {
529 4130 : ECHO;
530 4130 : }
531 : <xq,xus>{xqinside} {
532 4130 : ECHO;
533 165002 : }
534 : <xe>{xeinside} {
535 165002 : ECHO;
536 1659 : }
537 : <xe>{xeunicode} {
538 1659 : ECHO;
539 124 : }
540 : <xe>{xeunicodefail} {
541 124 : ECHO;
542 8 : }
543 : <xe>{xeescape} {
544 8 : ECHO;
545 959 : }
546 : <xe>{xeoctesc} {
547 959 : ECHO;
548 14 : }
549 : <xe>{xehexesc} {
550 14 : ECHO;
551 6 : }
552 : <xe>. {
553 6 : /* This is only needed for \ just before EOF */
554 0 : ECHO;
555 0 : }
556 :
557 0 : {dolqdelim} {
558 4537 : cur_state->dolqstart = pg_strdup(yytext);
559 4537 : BEGIN(xdolq);
560 4537 : ECHO;
561 4537 : }
562 : {dolqfailed} {
563 4537 : /* throw back all but the initial "$" */
564 0 : yyless(1);
565 0 : ECHO;
566 0 : }
567 : <xdolq>{dolqdelim} {
568 0 : if (strcmp(yytext, cur_state->dolqstart) == 0)
569 4753 : {
570 : free(cur_state->dolqstart);
571 4537 : cur_state->dolqstart = NULL;
572 4537 : BEGIN(INITIAL);
573 4537 : }
574 : else
575 : {
576 : /*
577 : * When we fail to match $...$ to dolqstart, transfer
578 : * the $... part to the output, but put back the final
579 : * $ for rescanning. Consider $delim$...$junk$delim$
580 : */
581 : yyless(yyleng - 1);
582 216 : }
583 : ECHO;
584 4753 : }
585 : <xdolq>{dolqinside} {
586 4753 : ECHO;
587 23961 : }
588 : <xdolq>{dolqfailed} {
589 23961 : ECHO;
590 574 : }
591 : <xdolq>. {
592 574 : /* This is only needed for $ inside the quoted text */
593 1588 : ECHO;
594 1588 : }
595 :
596 1588 : {xdstart} {
597 6528 : BEGIN(xd);
598 6528 : ECHO;
599 6528 : }
600 : {xuistart} {
601 6528 : BEGIN(xui);
602 16 : ECHO;
603 16 : }
604 : <xd>{xdstop} {
605 16 : BEGIN(INITIAL);
606 6528 : ECHO;
607 6528 : }
608 : <xui>{dquote} {
609 6528 : BEGIN(INITIAL);
610 16 : ECHO;
611 16 : }
612 : <xd,xui>{xddouble} {
613 16 : ECHO;
614 67 : }
615 : <xd,xui>{xdinside} {
616 67 : ECHO;
617 6605 : }
618 :
619 6605 : {xufailed} {
620 0 : /* throw back all but the initial u/U */
621 : yyless(1);
622 0 : ECHO;
623 0 : }
624 :
625 0 : {typecast} {
626 35975 : ECHO;
627 35975 : }
628 :
629 35975 : {dot_dot} {
630 0 : ECHO;
631 0 : }
632 :
633 0 : {colon_equals} {
634 1673 : ECHO;
635 1673 : }
636 :
637 1673 : {equals_greater} {
638 1361 : ECHO;
639 1361 : }
640 :
641 1361 : {less_equals} {
642 1590 : ECHO;
643 1590 : }
644 :
645 1590 : {greater_equals} {
646 4186 : ECHO;
647 4186 : }
648 :
649 4186 : {less_greater} {
650 893 : ECHO;
651 893 : }
652 :
653 893 : {not_equals} {
654 1523 : ECHO;
655 1523 : }
656 :
657 1523 : {right_arrow} {
658 773 : ECHO;
659 773 : }
660 :
661 773 : /*
662 : * These rules are specific to psql --- they implement parenthesis
663 : * counting and detection of command-ending semicolon. These must
664 : * appear before the {self} rule so that they take precedence over it.
665 : */
666 :
667 253327 : "(" {
668 : cur_state->paren_depth++;
669 253327 : ECHO;
670 253327 : }
671 :
672 253327 : ")" {
673 253318 : if (cur_state->paren_depth > 0)
674 253318 : cur_state->paren_depth--;
675 253318 : ECHO;
676 253318 : }
677 :
678 253318 : ";" {
679 241897 : ECHO;
680 241897 : if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
681 241897 : {
682 : /* Terminate lexing temporarily */
683 : cur_state->start_state = YY_START;
684 241739 : cur_state->identifier_count = 0;
685 241739 : return LEXRES_SEMI;
686 241739 : }
687 : }
688 :
689 158 : /*
690 : * psql-specific rules to handle backslash commands and variable
691 : * substitution. We want these before {self}, also.
692 : */
693 :
694 512 : "\\"[;:] {
695 : /* Force a semi-colon or colon into the query buffer */
696 : psqlscan_emit(cur_state, yytext + 1, 1);
697 512 : if (yytext[1] == ';')
698 512 : cur_state->identifier_count = 0;
699 512 : }
700 :
701 512 : "\\" {
702 32239 : /* Terminate lexing temporarily */
703 : cur_state->start_state = YY_START;
704 32239 : return LEXRES_BACKSLASH;
705 32239 : }
706 :
707 : :{variable_char}+ {
708 1778 : /* Possible psql variable substitution */
709 : char *varname;
710 : char *value;
711 :
712 : varname = psqlscan_extract_substring(cur_state,
713 1778 : yytext + 1,
714 1778 : yyleng - 1);
715 1778 : if (cur_state->callbacks->get_variable)
716 1778 : value = cur_state->callbacks->get_variable(varname,
717 1182 : PQUOTE_PLAIN,
718 : cur_state->cb_passthrough);
719 : else
720 : value = NULL;
721 596 :
722 : if (value)
723 1778 : {
724 : /* It is a variable, check for recursion */
725 : if (psqlscan_var_is_current_source(cur_state, varname))
726 874 : {
727 : /* Recursive expansion --- don't go there */
728 : pg_log_warning("skipping recursive expansion of variable \"%s\"",
729 0 : varname);
730 : /* Instead copy the string as is */
731 : ECHO;
732 0 : }
733 : else
734 : {
735 : /* OK, perform substitution */
736 : psqlscan_push_new_buffer(cur_state, value, varname);
737 874 : /* yy_scan_string already made buffer active */
738 : }
739 : free(value);
740 874 : }
741 : else
742 : {
743 : /*
744 : * if the variable doesn't exist we'll copy the string
745 : * as is
746 : */
747 : ECHO;
748 904 : }
749 :
750 : free(varname);
751 1778 : }
752 :
753 1778 : :'{variable_char}+' {
754 612 : psqlscan_escape_variable(cur_state, yytext, yyleng,
755 612 : PQUOTE_SQL_LITERAL);
756 : }
757 :
758 612 : :\"{variable_char}+\" {
759 21 : psqlscan_escape_variable(cur_state, yytext, yyleng,
760 21 : PQUOTE_SQL_IDENT);
761 : }
762 :
763 21 : :\{\?{variable_char}+\} {
764 8 : psqlscan_test_variable(cur_state, yytext, yyleng);
765 8 : }
766 :
767 8 : /*
768 : * These rules just avoid the need for scanner backup if one of the
769 : * three rules above fails to match completely.
770 : */
771 :
772 0 : :'{variable_char}* {
773 : /* Throw back everything but the colon */
774 : yyless(1);
775 0 : ECHO;
776 0 : }
777 :
778 0 : :\"{variable_char}* {
779 0 : /* Throw back everything but the colon */
780 : yyless(1);
781 0 : ECHO;
782 0 : }
783 :
784 0 : :\{\?{variable_char}* {
785 0 : /* Throw back everything but the colon */
786 : yyless(1);
787 0 : ECHO;
788 0 : }
789 : :\{ {
790 0 : /* Throw back everything but the colon */
791 0 : yyless(1);
792 0 : ECHO;
793 0 : }
794 :
795 0 : /*
796 : * Back to backend-compatible rules.
797 : */
798 :
799 440997 : {self} {
800 : ECHO;
801 440997 : }
802 :
803 440997 : {operator} {
804 12735 : /*
805 : * Check for embedded slash-star or dash-dash; those
806 : * are comment starts, so operator must stop there.
807 : * Note that slash-star or dash-dash at the first
808 : * character will match a prior rule, not this one.
809 : */
810 : int nchars = yyleng;
811 12735 : char *slashstar = strstr(yytext, "/*");
812 12735 : char *dashdash = strstr(yytext, "--");
813 12735 :
814 : if (slashstar && dashdash)
815 12735 : {
816 : /* if both appear, take the first one */
817 : if (slashstar > dashdash)
818 0 : slashstar = dashdash;
819 0 : }
820 : else if (!slashstar)
821 12735 : slashstar = dashdash;
822 12695 : if (slashstar)
823 12735 : nchars = slashstar - yytext;
824 48 :
825 : /*
826 : * For SQL compatibility, '+' and '-' cannot be the
827 : * last char of a multi-char operator unless the operator
828 : * contains chars that are not in SQL operators.
829 : * The idea is to lex '=-' as two operators, but not
830 : * to forbid operator names like '?-' that could not be
831 : * sequences of SQL operators.
832 : */
833 : if (nchars > 1 &&
834 12735 : (yytext[nchars - 1] == '+' ||
835 11706 : yytext[nchars - 1] == '-'))
836 11702 : {
837 : int ic;
838 :
839 : for (ic = nchars - 2; ic >= 0; ic--)
840 385 : {
841 : char c = yytext[ic];
842 326 : if (c == '~' || c == '!' || c == '@' ||
843 326 : c == '#' || c == '^' || c == '&' ||
844 270 : c == '|' || c == '`' || c == '?' ||
845 106 : c == '%')
846 : break;
847 : }
848 : if (ic < 0)
849 291 : {
850 : /*
851 : * didn't find a qualifying character, so remove
852 : * all trailing [+-]
853 : */
854 : do {
855 : nchars--;
856 59 : } while (nchars > 1 &&
857 59 : (yytext[nchars - 1] == '+' ||
858 23 : yytext[nchars - 1] == '-'));
859 23 : }
860 : }
861 :
862 : if (nchars < yyleng)
863 12735 : {
864 : /* Strip the unwanted chars from the token */
865 : yyless(nchars);
866 107 : }
867 : ECHO;
868 12735 : }
869 :
870 12735 : {param} {
871 779 : ECHO;
872 779 : }
873 : {param_junk} {
874 779 : ECHO;
875 8 : }
876 :
877 8 : {decinteger} {
878 139645 : ECHO;
879 139645 : }
880 : {hexinteger} {
881 139645 : ECHO;
882 83 : }
883 : {octinteger} {
884 83 : ECHO;
885 40 : }
886 : {bininteger} {
887 40 : ECHO;
888 40 : }
889 : {hexfail} {
890 40 : ECHO;
891 4 : }
892 : {octfail} {
893 4 : ECHO;
894 4 : }
895 : {binfail} {
896 4 : ECHO;
897 4 : }
898 : {numeric} {
899 4 : ECHO;
900 5274 : }
901 : {numericfail} {
902 5274 : /* throw back the .., and treat as integer */
903 0 : yyless(yyleng - 2);
904 0 : ECHO;
905 0 : }
906 : {real} {
907 0 : ECHO;
908 418 : }
909 : {realfail} {
910 418 : ECHO;
911 4 : }
912 : {integer_junk} {
913 4 : ECHO;
914 44 : }
915 : {numeric_junk} {
916 44 : ECHO;
917 32 : }
918 : {real_junk} {
919 32 : ECHO;
920 0 : }
921 :
922 0 :
923 1817128 : {identifier} {
924 : /*
925 : * We need to track if we are inside a BEGIN .. END block
926 : * in a function definition, so that semicolons contained
927 : * therein don't terminate the whole statement. Short of
928 : * writing a full parser here, the following heuristic
929 : * should work. First, we track whether the beginning of
930 : * the statement matches CREATE [OR REPLACE]
931 : * {FUNCTION|PROCEDURE}
932 : */
933 :
934 : if (cur_state->identifier_count == 0)
935 1817128 : memset(cur_state->identifiers, 0, sizeof(cur_state->identifiers));
936 248601 :
937 : if (pg_strcasecmp(yytext, "create") == 0 ||
938 3591561 : pg_strcasecmp(yytext, "function") == 0 ||
939 3541782 : pg_strcasecmp(yytext, "procedure") == 0 ||
940 3532349 : pg_strcasecmp(yytext, "or") == 0 ||
941 3525602 : pg_strcasecmp(yytext, "replace") == 0)
942 1760602 : {
943 : if (cur_state->identifier_count < sizeof(cur_state->identifiers))
944 58008 : cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned char) yytext[0]);
945 52226 : }
946 :
947 : cur_state->identifier_count++;
948 1817128 :
949 : if (cur_state->identifiers[0] == 'c' &&
950 1817128 : (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' ||
951 434695 : (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' &&
952 396703 : (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p'))) &&
953 14887 : cur_state->paren_depth == 0)
954 47294 : {
955 : if (pg_strcasecmp(yytext, "begin") == 0)
956 37815 : cur_state->begin_depth++;
957 110 : else if (pg_strcasecmp(yytext, "case") == 0)
958 37705 : {
959 : /*
960 : * CASE also ends with END. We only need to track
961 : * this if we are already inside a BEGIN.
962 : */
963 : if (cur_state->begin_depth >= 1)
964 4 : cur_state->begin_depth++;
965 4 : }
966 : else if (pg_strcasecmp(yytext, "end") == 0)
967 37701 : {
968 : if (cur_state->begin_depth > 0)
969 114 : cur_state->begin_depth--;
970 114 : }
971 : }
972 :
973 : ECHO;
974 1817128 : }
975 :
976 1817128 : {other} {
977 8 : ECHO;
978 8 : }
979 :
980 8 : <<EOF>> {
981 514353 : if (cur_state->buffer_stack == NULL)
982 514353 : {
983 : cur_state->start_state = YY_START;
984 513479 : return LEXRES_EOL; /* end of input reached */
985 513479 : }
986 :
987 : /*
988 : * We were expanding a variable, so pop the inclusion
989 : * stack and keep lexing
990 : */
991 : psqlscan_pop_buffer_stack(cur_state);
992 874 : psqlscan_select_top_buffer(cur_state);
993 874 : }
994 :
995 874 : %%
996 0 :
997 : /* LCOV_EXCL_STOP */
998 :
999 : /*
1000 : * Create a lexer working state struct.
1001 : *
1002 : * callbacks is a struct of function pointers that encapsulate some
1003 : * behavior we need from the surrounding program. This struct must
1004 : * remain valid for the lifespan of the PsqlScanState.
1005 : */
1006 : PsqlScanState
1007 : psql_scan_create(const PsqlScanCallbacks *callbacks)
1008 10250 : {
1009 : PsqlScanState state;
1010 :
1011 : state = pg_malloc0_object(PsqlScanStateData);
1012 10250 :
1013 : state->callbacks = callbacks;
1014 10250 :
1015 : yylex_init(&state->scanner);
1016 10250 :
1017 : yyset_extra(state, state->scanner);
1018 10250 :
1019 : psql_scan_reset(state);
1020 10250 :
1021 : return state;
1022 10250 : }
1023 :
1024 : /*
1025 : * Destroy a lexer working state struct, releasing all resources.
1026 : */
1027 : void
1028 : psql_scan_destroy(PsqlScanState state)
1029 10193 : {
1030 : psql_scan_finish(state);
1031 10193 :
1032 : psql_scan_reset(state);
1033 10193 :
1034 : yylex_destroy(state->scanner);
1035 10193 :
1036 : free(state);
1037 10193 : }
1038 10193 :
1039 : /*
1040 : * Set the callback passthrough pointer for the lexer.
1041 : *
1042 : * This could have been integrated into psql_scan_create, but keeping it
1043 : * separate allows the application to change the pointer later, which might
1044 : * be useful.
1045 : */
1046 : void
1047 : psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
1048 9647 : {
1049 : state->cb_passthrough = passthrough;
1050 9647 : }
1051 9647 :
1052 : /*
1053 : * Set up to perform lexing of the given input line.
1054 : *
1055 : * The text at *line, extending for line_len bytes, will be scanned by
1056 : * subsequent calls to the psql_scan routines. psql_scan_finish should
1057 : * be called when scanning is complete. Note that the lexer retains
1058 : * a pointer to the storage at *line --- this string must not be altered
1059 : * or freed until after psql_scan_finish is called.
1060 : *
1061 : * encoding is the libpq identifier for the character encoding in use,
1062 : * and std_strings says whether standard_conforming_strings is on.
1063 : */
1064 : void
1065 : psql_scan_setup(PsqlScanState state,
1066 513794 : const char *line, int line_len,
1067 : int encoding, bool std_strings)
1068 : {
1069 : /* Mustn't be scanning already */
1070 : Assert(state->scanbufhandle == NULL);
1071 : Assert(state->buffer_stack == NULL);
1072 :
1073 : /* Do we need to hack the character set encoding? */
1074 : state->encoding = encoding;
1075 513794 : state->safe_encoding = pg_valid_server_encoding_id(encoding);
1076 513794 :
1077 : /* Save standard-strings flag as well */
1078 : state->std_strings = std_strings;
1079 513794 :
1080 : /* Set up flex input buffer with appropriate translation and padding */
1081 : state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
1082 513794 : &state->scanbuf);
1083 : state->scanline = line;
1084 513794 :
1085 : /* Set lookaside data in case we have to map unsafe encoding */
1086 : state->curline = state->scanbuf;
1087 513794 : state->refline = state->scanline;
1088 513794 :
1089 : /* Initialize state for psql_scan_get_location() */
1090 : state->cur_line_no = 0; /* yylex not called yet */
1091 513794 : state->cur_line_ptr = state->scanbuf;
1092 513794 : }
1093 513794 :
1094 : /*
1095 : * Do lexical analysis of SQL command text.
1096 : *
1097 : * The text previously passed to psql_scan_setup is scanned, and appended
1098 : * (possibly with transformation) to query_buf.
1099 : *
1100 : * The return value indicates the condition that stopped scanning:
1101 : *
1102 : * PSCAN_SEMICOLON: found a command-ending semicolon. (The semicolon is
1103 : * transferred to query_buf.) The command accumulated in query_buf should
1104 : * be executed, then clear query_buf and call again to scan the remainder
1105 : * of the line.
1106 : *
1107 : * PSCAN_BACKSLASH: found a backslash that starts a special command.
1108 : * Any previous data on the line has been transferred to query_buf.
1109 : * The caller will typically next apply a separate flex lexer to scan
1110 : * the special command.
1111 : *
1112 : * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
1113 : * incomplete SQL command. *prompt is set to the appropriate prompt type.
1114 : *
1115 : * PSCAN_EOL: the end of the line was reached, and there is no lexical
1116 : * reason to consider the command incomplete. The caller may or may not
1117 : * choose to send it. *prompt is set to the appropriate prompt type if
1118 : * the caller chooses to collect more input.
1119 : *
1120 : * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
1121 : * be called next, then the cycle may be repeated with a fresh input line.
1122 : *
1123 : * In all cases, *prompt is set to an appropriate prompt type code for the
1124 : * next line-input operation.
1125 : */
1126 : PsqlScanResult
1127 : psql_scan(PsqlScanState state,
1128 787457 : PQExpBuffer query_buf,
1129 : promptStatus_t *prompt)
1130 : {
1131 : PsqlScanResult result;
1132 : int lexresult;
1133 :
1134 : /* Must be scanning already */
1135 : Assert(state->scanbufhandle != NULL);
1136 :
1137 : /* Set current output target */
1138 : state->output_buf = query_buf;
1139 787457 :
1140 : /* Set input source */
1141 : if (state->buffer_stack != NULL)
1142 787457 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
1143 60 : else
1144 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1145 787397 :
1146 : /* And lex. */
1147 : lexresult = yylex(NULL, state->scanner);
1148 787457 :
1149 : /* Notify psql_scan_get_location() that a yylex call has been made. */
1150 : if (state->cur_line_no == 0)
1151 787457 : state->cur_line_no = 1;
1152 513792 :
1153 : /*
1154 : * Check termination state and return appropriate result info.
1155 : */
1156 : switch (lexresult)
1157 787457 : {
1158 : case LEXRES_EOL: /* end of input */
1159 513479 : switch (state->start_state)
1160 513479 : {
1161 : case INITIAL:
1162 482046 : case xqs: /* we treat this like INITIAL */
1163 : if (state->paren_depth > 0)
1164 482046 : {
1165 : result = PSCAN_INCOMPLETE;
1166 42303 : *prompt = PROMPT_PAREN;
1167 42303 : }
1168 : else if (state->begin_depth > 0)
1169 439743 : {
1170 : result = PSCAN_INCOMPLETE;
1171 665 : *prompt = PROMPT_CONTINUE;
1172 665 : }
1173 : else if (query_buf->len > 0)
1174 439078 : {
1175 : result = PSCAN_EOL;
1176 93156 : *prompt = PROMPT_CONTINUE;
1177 93156 : }
1178 : else
1179 : {
1180 : /* never bother to send an empty buffer */
1181 : result = PSCAN_INCOMPLETE;
1182 345922 : *prompt = PROMPT_READY;
1183 345922 : }
1184 : break;
1185 482046 : case xb:
1186 0 : result = PSCAN_INCOMPLETE;
1187 0 : *prompt = PROMPT_SINGLEQUOTE;
1188 0 : break;
1189 0 : case xc:
1190 510 : result = PSCAN_INCOMPLETE;
1191 510 : *prompt = PROMPT_COMMENT;
1192 510 : break;
1193 510 : case xd:
1194 23 : result = PSCAN_INCOMPLETE;
1195 23 : *prompt = PROMPT_DOUBLEQUOTE;
1196 23 : break;
1197 23 : case xh:
1198 0 : result = PSCAN_INCOMPLETE;
1199 0 : *prompt = PROMPT_SINGLEQUOTE;
1200 0 : break;
1201 0 : case xe:
1202 301 : result = PSCAN_INCOMPLETE;
1203 301 : *prompt = PROMPT_SINGLEQUOTE;
1204 301 : break;
1205 301 : case xq:
1206 7022 : result = PSCAN_INCOMPLETE;
1207 7022 : *prompt = PROMPT_SINGLEQUOTE;
1208 7022 : break;
1209 7022 : case xdolq:
1210 23577 : result = PSCAN_INCOMPLETE;
1211 23577 : *prompt = PROMPT_DOLLARQUOTE;
1212 23577 : break;
1213 23577 : case xui:
1214 0 : result = PSCAN_INCOMPLETE;
1215 0 : *prompt = PROMPT_DOUBLEQUOTE;
1216 0 : break;
1217 0 : case xus:
1218 0 : result = PSCAN_INCOMPLETE;
1219 0 : *prompt = PROMPT_SINGLEQUOTE;
1220 0 : break;
1221 0 : default:
1222 0 : /* can't get here */
1223 : fprintf(stderr, "invalid YY_START\n");
1224 0 : exit(1);
1225 0 : }
1226 : break;
1227 513479 : case LEXRES_SEMI: /* semicolon */
1228 241739 : result = PSCAN_SEMICOLON;
1229 241739 : *prompt = PROMPT_READY;
1230 241739 : break;
1231 241739 : case LEXRES_BACKSLASH: /* backslash */
1232 32239 : result = PSCAN_BACKSLASH;
1233 32239 : *prompt = PROMPT_READY;
1234 32239 : break;
1235 32239 : default:
1236 0 : /* can't get here */
1237 : fprintf(stderr, "invalid yylex result\n");
1238 0 : exit(1);
1239 0 : }
1240 :
1241 : return result;
1242 787457 : }
1243 :
1244 : /*
1245 : * Clean up after scanning a string. This flushes any unread input and
1246 : * releases resources (but not the PsqlScanState itself). Note however
1247 : * that this does not reset the lexer scan state; that can be done by
1248 : * psql_scan_reset(), which is an orthogonal operation.
1249 : *
1250 : * It is legal to call this when not scanning anything (makes it easier
1251 : * to deal with error recovery).
1252 : */
1253 : void
1254 : psql_scan_finish(PsqlScanState state)
1255 523611 : {
1256 : /* Drop any incomplete variable expansions. */
1257 : while (state->buffer_stack != NULL)
1258 523611 : psqlscan_pop_buffer_stack(state);
1259 0 :
1260 : /* Done with the outer scan buffer, too */
1261 : if (state->scanbufhandle)
1262 523611 : yy_delete_buffer(state->scanbufhandle, state->scanner);
1263 513738 : state->scanbufhandle = NULL;
1264 523611 : if (state->scanbuf)
1265 523611 : free(state->scanbuf);
1266 513738 : state->scanbuf = NULL;
1267 523611 : }
1268 523611 :
1269 : /*
1270 : * Reset lexer scanning state to start conditions. This is appropriate
1271 : * for executing \r psql commands (or any other time that we discard the
1272 : * prior contents of query_buf). It is not, however, necessary to do this
1273 : * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1274 : * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1275 : * conditions are returned.
1276 : *
1277 : * Note that this is unrelated to flushing unread input; that task is
1278 : * done by psql_scan_finish().
1279 : */
1280 : void
1281 : psql_scan_reset(PsqlScanState state)
1282 22542 : {
1283 : state->start_state = INITIAL;
1284 22542 : state->paren_depth = 0;
1285 22542 : state->xcdepth = 0; /* not really necessary */
1286 22542 : if (state->dolqstart)
1287 22542 : free(state->dolqstart);
1288 0 : state->dolqstart = NULL;
1289 22542 : state->identifier_count = 0;
1290 22542 : state->begin_depth = 0;
1291 22542 : }
1292 22542 :
1293 : /*
1294 : * Reselect this lexer (psqlscan.l) after using another one.
1295 : *
1296 : * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
1297 : * state, because we'd never switch to another lexer in a different state.
1298 : * However, we don't want to reset e.g. paren_depth, so this can't be
1299 : * the same as psql_scan_reset().
1300 : *
1301 : * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
1302 : * must be a superset of this.
1303 : *
1304 : * Note: it seems likely that other lexers could just assign INITIAL for
1305 : * themselves, since that probably has the value zero in every flex-generated
1306 : * lexer. But let's not assume that.
1307 : */
1308 : void
1309 : psql_scan_reselect_sql_lexer(PsqlScanState state)
1310 153193 : {
1311 : state->start_state = INITIAL;
1312 153193 : }
1313 153193 :
1314 : /*
1315 : * Return true if lexer is currently in an "inside quotes" state.
1316 : *
1317 : * This is pretty grotty but is needed to preserve the old behavior
1318 : * that mainloop.c drops blank lines not inside quotes without even
1319 : * echoing them.
1320 : */
1321 : bool
1322 : psql_scan_in_quote(PsqlScanState state)
1323 99000 : {
1324 : return state->start_state != INITIAL &&
1325 99602 : state->start_state != xqs;
1326 602 : }
1327 :
1328 : /*
1329 : * Return the current scanning location (end+1 of last scanned token),
1330 : * as a line number counted from 1 and an offset from string start.
1331 : *
1332 : * This considers only the outermost input string, and therefore is of
1333 : * limited use for programs that use psqlscan_push_new_buffer().
1334 : *
1335 : * It would be a bit easier probably to use "%option yylineno" to count
1336 : * lines, but the flex manual says that has a performance cost, and only
1337 : * a minority of programs using psqlscan have need for this functionality.
1338 : * So we implement it ourselves without adding overhead to the lexer itself.
1339 : */
1340 : void
1341 : psql_scan_get_location(PsqlScanState state,
1342 1737 : int *lineno, int *offset)
1343 : {
1344 : const char *line_end;
1345 :
1346 : /*
1347 : * We rely on flex's having stored a NUL after the current token in
1348 : * scanbuf. Therefore we must specially handle the state before yylex()
1349 : * has been called, when obviously that won't have happened yet.
1350 : */
1351 : if (state->cur_line_no == 0)
1352 1737 : {
1353 : *lineno = 1;
1354 0 : *offset = 0;
1355 0 : return;
1356 0 : }
1357 :
1358 : /*
1359 : * Advance cur_line_no/cur_line_ptr past whatever has been lexed so far.
1360 : * Doing this prevents repeated calls from being O(N^2) for long inputs.
1361 : */
1362 : while ((line_end = strchr(state->cur_line_ptr, '\n')) != NULL)
1363 2210 : {
1364 : state->cur_line_no++;
1365 473 : state->cur_line_ptr = line_end + 1;
1366 473 : }
1367 : state->cur_line_ptr += strlen(state->cur_line_ptr);
1368 1737 :
1369 : /* Report current location. */
1370 : *lineno = state->cur_line_no;
1371 1737 : *offset = state->cur_line_ptr - state->scanbuf;
1372 1737 : }
1373 :
1374 : /*
1375 : * Push the given string onto the stack of stuff to scan.
1376 : *
1377 : * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1378 : */
1379 : void
1380 : psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
1381 874 : const char *varname)
1382 : {
1383 : StackElem *stackelem;
1384 :
1385 : stackelem = pg_malloc_object(StackElem);
1386 874 :
1387 : /*
1388 : * In current usage, the passed varname points at the current flex input
1389 : * buffer; we must copy it before calling psqlscan_prepare_buffer()
1390 : * because that will change the buffer state.
1391 : */
1392 : stackelem->varname = varname ? pg_strdup(varname) : NULL;
1393 874 :
1394 : stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
1395 874 : &stackelem->bufstring);
1396 : state->curline = stackelem->bufstring;
1397 874 : if (state->safe_encoding)
1398 874 : {
1399 : stackelem->origstring = NULL;
1400 874 : state->refline = stackelem->bufstring;
1401 874 : }
1402 : else
1403 : {
1404 : stackelem->origstring = pg_strdup(newstr);
1405 0 : state->refline = stackelem->origstring;
1406 0 : }
1407 : stackelem->next = state->buffer_stack;
1408 874 : state->buffer_stack = stackelem;
1409 874 : }
1410 874 :
1411 : /*
1412 : * Pop the topmost buffer stack item (there must be one!)
1413 : *
1414 : * NB: after this, the flex input state is unspecified; caller must
1415 : * switch to an appropriate buffer to continue lexing.
1416 : * See psqlscan_select_top_buffer().
1417 : */
1418 : void
1419 : psqlscan_pop_buffer_stack(PsqlScanState state)
1420 874 : {
1421 : StackElem *stackelem = state->buffer_stack;
1422 874 :
1423 : state->buffer_stack = stackelem->next;
1424 874 : yy_delete_buffer(stackelem->buf, state->scanner);
1425 874 : free(stackelem->bufstring);
1426 874 : if (stackelem->origstring)
1427 874 : free(stackelem->origstring);
1428 0 : if (stackelem->varname)
1429 874 : free(stackelem->varname);
1430 874 : free(stackelem);
1431 874 : }
1432 874 :
1433 : /*
1434 : * Select the topmost surviving buffer as the active input.
1435 : */
1436 : void
1437 : psqlscan_select_top_buffer(PsqlScanState state)
1438 874 : {
1439 : StackElem *stackelem = state->buffer_stack;
1440 874 :
1441 : if (stackelem != NULL)
1442 874 : {
1443 : yy_switch_to_buffer(stackelem->buf, state->scanner);
1444 0 : state->curline = stackelem->bufstring;
1445 0 : state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
1446 0 : }
1447 : else
1448 : {
1449 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1450 874 : state->curline = state->scanbuf;
1451 874 : state->refline = state->scanline;
1452 874 : }
1453 : }
1454 874 :
1455 : /*
1456 : * Check if specified variable name is the source for any string
1457 : * currently being scanned
1458 : */
1459 : bool
1460 : psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
1461 874 : {
1462 : StackElem *stackelem;
1463 :
1464 : for (stackelem = state->buffer_stack;
1465 874 : stackelem != NULL;
1466 874 : stackelem = stackelem->next)
1467 0 : {
1468 : if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
1469 0 : return true;
1470 0 : }
1471 : return false;
1472 874 : }
1473 :
1474 : /*
1475 : * Set up a flex input buffer to scan the given data. We always make a
1476 : * copy of the data. If working in an unsafe encoding, the copy has
1477 : * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1478 : *
1479 : * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1480 : */
1481 : YY_BUFFER_STATE
1482 : psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
1483 514668 : char **txtcopy)
1484 : {
1485 : char *newtxt;
1486 :
1487 : /* Flex wants two \0 characters after the actual data */
1488 : newtxt = pg_malloc_array(char, (len + 2));
1489 514668 : *txtcopy = newtxt;
1490 514668 : newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1491 514668 :
1492 : if (state->safe_encoding)
1493 514668 : memcpy(newtxt, txt, len);
1494 514528 : else
1495 : {
1496 : /* Gotta do it the hard way */
1497 : int i = 0;
1498 140 :
1499 : while (i < len)
1500 808 : {
1501 : int thislen = PQmblen(txt + i, state->encoding);
1502 668 :
1503 : /* first byte should always be okay... */
1504 : newtxt[i] = txt[i];
1505 668 : i++;
1506 668 : while (--thislen > 0 && i < len)
1507 808 : newtxt[i++] = (char) 0xFF;
1508 140 : }
1509 : }
1510 :
1511 : return yy_scan_buffer(newtxt, len + 2, state->scanner);
1512 514668 : }
1513 :
1514 : /*
1515 : * psqlscan_emit() --- body for ECHO macro
1516 : *
1517 : * NB: this must be used for ALL and ONLY the text copied from the flex
1518 : * input data. If you pass it something that is not part of the yytext
1519 : * string, you are making a mistake. Internally generated text can be
1520 : * appended directly to state->output_buf.
1521 : */
1522 : void
1523 : psqlscan_emit(PsqlScanState state, const char *txt, int len)
1524 6504358 : {
1525 : PQExpBuffer output_buf = state->output_buf;
1526 6504358 :
1527 : if (state->safe_encoding)
1528 6504358 : appendBinaryPQExpBuffer(output_buf, txt, len);
1529 6503882 : else
1530 : {
1531 : /* Gotta do it the hard way */
1532 : const char *reference = state->refline;
1533 476 : int i;
1534 :
1535 : reference += (txt - state->curline);
1536 476 :
1537 : for (i = 0; i < len; i++)
1538 1277 : {
1539 : char ch = txt[i];
1540 801 :
1541 : if (ch == (char) 0xFF)
1542 801 : ch = reference[i];
1543 140 : appendPQExpBufferChar(output_buf, ch);
1544 801 : }
1545 : }
1546 : }
1547 6504358 :
1548 : /*
1549 : * psqlscan_extract_substring --- fetch value of (part of) the current token
1550 : *
1551 : * This is like psqlscan_emit(), except that the data is returned as a
1552 : * malloc'd string rather than being pushed directly to state->output_buf.
1553 : */
1554 : char *
1555 : psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
1556 3315 : {
1557 : char *result = pg_malloc_array(char, (len + 1));
1558 3315 :
1559 : if (state->safe_encoding)
1560 3315 : memcpy(result, txt, len);
1561 3315 : else
1562 : {
1563 : /* Gotta do it the hard way */
1564 : const char *reference = state->refline;
1565 0 : int i;
1566 :
1567 : reference += (txt - state->curline);
1568 0 :
1569 : for (i = 0; i < len; i++)
1570 0 : {
1571 : char ch = txt[i];
1572 0 :
1573 : if (ch == (char) 0xFF)
1574 0 : ch = reference[i];
1575 0 : result[i] = ch;
1576 0 : }
1577 : }
1578 : result[len] = '\0';
1579 3315 : return result;
1580 3315 : }
1581 :
1582 : /*
1583 : * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
1584 : *
1585 : * If the variable name is found, escape its value using the appropriate
1586 : * quoting method and emit the value to output_buf. (Since the result is
1587 : * surely quoted, there is never any reason to rescan it.) If we don't
1588 : * find the variable or escaping fails, emit the token as-is.
1589 : */
1590 : void
1591 : psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
1592 677 : PsqlScanQuoteType quote)
1593 : {
1594 : char *varname;
1595 : char *value;
1596 :
1597 : /* Variable lookup. */
1598 : varname = psqlscan_extract_substring(state, txt + 2, len - 3);
1599 677 : if (state->callbacks->get_variable)
1600 677 : value = state->callbacks->get_variable(varname, quote,
1601 677 : state->cb_passthrough);
1602 : else
1603 : value = NULL;
1604 0 : free(varname);
1605 677 :
1606 : if (value)
1607 677 : {
1608 : /* Emit the suitably-escaped value */
1609 : appendPQExpBufferStr(state->output_buf, value);
1610 640 : free(value);
1611 640 : }
1612 : else
1613 : {
1614 : /* Emit original token as-is */
1615 : psqlscan_emit(state, txt, len);
1616 37 : }
1617 : }
1618 677 :
1619 : void
1620 : psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
1621 21 : {
1622 : char *varname;
1623 : char *value;
1624 :
1625 : varname = psqlscan_extract_substring(state, txt + 3, len - 4);
1626 21 : if (state->callbacks->get_variable)
1627 21 : value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
1628 21 : state->cb_passthrough);
1629 : else
1630 : value = NULL;
1631 0 : free(varname);
1632 21 :
1633 : if (value != NULL)
1634 21 : {
1635 : appendPQExpBufferStr(state->output_buf, "TRUE");
1636 9 : free(value);
1637 9 : }
1638 : else
1639 : {
1640 : appendPQExpBufferStr(state->output_buf, "FALSE");
1641 12 : }
1642 : }
1643 21 : /* END: function "psqlscan_test_variable" */
|